From 0dce9e1379ea867c9a00c8e6cf1d42badb52601d Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 30 Oct 2018 23:55:43 +1100 Subject: Write some tests for the email pusher (#4095) --- tests/server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tests/server.py') diff --git a/tests/server.py b/tests/server.py index 7bee58dff1..819c854448 100644 --- a/tests/server.py +++ b/tests/server.py @@ -125,7 +125,9 @@ def make_request(method, path, content=b"", access_token=None, request=SynapseRe req.content = BytesIO(content) if access_token: - req.requestHeaders.addRawHeader(b"Authorization", b"Bearer " + access_token) + req.requestHeaders.addRawHeader( + b"Authorization", b"Bearer " + access_token.encode('ascii') + ) if content: req.requestHeaders.addRawHeader(b"Content-Type", b"application/json") -- cgit 1.5.1 From cb7a6b2379e0e0a4ba8043da98e376b45d05b977 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 3 Nov 2018 00:19:23 +1100 Subject: Fix typing being reset causing infinite syncs (#4127) --- changelog.d/4127.bugfix | 1 + synapse/app/synchrotron.py | 14 ++++ synapse/handlers/typing.py | 14 ++-- tests/rest/client/v2_alpha/test_sync.py | 123 ++++++++++++++++++++++++++++++++ tests/server.py | 8 ++- 5 files changed, 155 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4127.bugfix (limited to 'tests/server.py') diff --git a/changelog.d/4127.bugfix b/changelog.d/4127.bugfix new file mode 100644 index 0000000000..0701d2ceaa --- /dev/null +++ b/changelog.d/4127.bugfix @@ -0,0 +1 @@ +If the typing stream ID goes backwards (as on a worker when the master restarts), the worker's typing handler will no longer erroneously report rooms containing new typing events. diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 3926c7f263..0354e82bf8 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -226,7 +226,15 @@ class SynchrotronPresence(object): class SynchrotronTyping(object): def __init__(self, hs): self._latest_room_serial = 0 + self._reset() + + def _reset(self): + """ + Reset the typing handler's data caches. + """ + # map room IDs to serial numbers self._room_serials = {} + # map room IDs to sets of users currently typing self._room_typing = {} def stream_positions(self): @@ -236,6 +244,12 @@ class SynchrotronTyping(object): return {"typing": self._latest_room_serial} def process_replication_rows(self, token, rows): + if self._latest_room_serial > token: + # The master has gone backwards. To prevent inconsistent data, just + # clear everything. + self._reset() + + # Set the latest serial token to whatever the server gave us. self._latest_room_serial = token for row in rows: diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index c610933dd4..a61bbf9392 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -63,11 +63,8 @@ class TypingHandler(object): self._member_typing_until = {} # clock time we expect to stop self._member_last_federation_poke = {} - # map room IDs to serial numbers - self._room_serials = {} self._latest_room_serial = 0 - # map room IDs to sets of users currently typing - self._room_typing = {} + self._reset() # caches which room_ids changed at which serials self._typing_stream_change_cache = StreamChangeCache( @@ -79,6 +76,15 @@ class TypingHandler(object): 5000, ) + def _reset(self): + """ + Reset the typing handler's data caches. + """ + # map room IDs to serial numbers + self._room_serials = {} + # map room IDs to sets of users currently typing + self._room_typing = {} + def _handle_timeouts(self): logger.info("Checking for typing timeouts") diff --git a/tests/rest/client/v2_alpha/test_sync.py b/tests/rest/client/v2_alpha/test_sync.py index 4c30c5f258..99b716f00a 100644 --- a/tests/rest/client/v2_alpha/test_sync.py +++ b/tests/rest/client/v2_alpha/test_sync.py @@ -15,9 +15,11 @@ from mock import Mock +from synapse.rest.client.v1 import admin, login, room from synapse.rest.client.v2_alpha import sync from tests import unittest +from tests.server import TimedOutException class FilterTestCase(unittest.HomeserverTestCase): @@ -65,3 +67,124 @@ class FilterTestCase(unittest.HomeserverTestCase): ["next_batch", "rooms", "account_data", "to_device", "device_lists"] ).issubset(set(channel.json_body.keys())) ) + + +class SyncTypingTests(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + sync.register_servlets, + ] + user_id = True + hijack_auth = False + + def test_sync_backwards_typing(self): + """ + If the typing serial goes backwards and the typing handler is then reset + (such as when the master restarts and sets the typing serial to 0), we + do not incorrectly return typing information that had a serial greater + than the now-reset serial. + """ + typing_url = "/rooms/%s/typing/%s?access_token=%s" + sync_url = "/sync?timeout=3000000&access_token=%s&since=%s" + + # Register the user who gets notified + user_id = self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # Register the user who sends the message + other_user_id = self.register_user("otheruser", "pass") + other_access_token = self.login("otheruser", "pass") + + # Create a room + room = self.helper.create_room_as(user_id, tok=access_token) + + # Invite the other person + self.helper.invite(room=room, src=user_id, tok=access_token, targ=other_user_id) + + # The other user joins + self.helper.join(room=room, user=other_user_id, tok=other_access_token) + + # The other user sends some messages + self.helper.send(room, body="Hi!", tok=other_access_token) + self.helper.send(room, body="There!", tok=other_access_token) + + # Start typing. + request, channel = self.make_request( + "PUT", + typing_url % (room, other_user_id, other_access_token), + b'{"typing": true, "timeout": 30000}', + ) + self.render(request) + self.assertEquals(200, channel.code) + + request, channel = self.make_request( + "GET", "/sync?access_token=%s" % (access_token,) + ) + self.render(request) + self.assertEquals(200, channel.code) + next_batch = channel.json_body["next_batch"] + + # Stop typing. + request, channel = self.make_request( + "PUT", + typing_url % (room, other_user_id, other_access_token), + b'{"typing": false}', + ) + self.render(request) + self.assertEquals(200, channel.code) + + # Start typing. + request, channel = self.make_request( + "PUT", + typing_url % (room, other_user_id, other_access_token), + b'{"typing": true, "timeout": 30000}', + ) + self.render(request) + self.assertEquals(200, channel.code) + + # Should return immediately + request, channel = self.make_request( + "GET", sync_url % (access_token, next_batch) + ) + self.render(request) + self.assertEquals(200, channel.code) + next_batch = channel.json_body["next_batch"] + + # Reset typing serial back to 0, as if the master had. + typing = self.hs.get_typing_handler() + typing._latest_room_serial = 0 + + # Since it checks the state token, we need some state to update to + # invalidate the stream token. + self.helper.send(room, body="There!", tok=other_access_token) + + request, channel = self.make_request( + "GET", sync_url % (access_token, next_batch) + ) + self.render(request) + self.assertEquals(200, channel.code) + next_batch = channel.json_body["next_batch"] + + # This should time out! But it does not, because our stream token is + # ahead, and therefore it's saying the typing (that we've actually + # already seen) is new, since it's got a token above our new, now-reset + # stream token. + request, channel = self.make_request( + "GET", sync_url % (access_token, next_batch) + ) + self.render(request) + self.assertEquals(200, channel.code) + next_batch = channel.json_body["next_batch"] + + # Clear the typing information, so that it doesn't think everything is + # in the future. + typing._reset() + + # Now it SHOULD fail as it never completes! + request, channel = self.make_request( + "GET", sync_url % (access_token, next_batch) + ) + self.assertRaises(TimedOutException, self.render, request) diff --git a/tests/server.py b/tests/server.py index 819c854448..cc6dbe04ac 100644 --- a/tests/server.py +++ b/tests/server.py @@ -21,6 +21,12 @@ from synapse.util import Clock from tests.utils import setup_test_homeserver as _sth +class TimedOutException(Exception): + """ + A web query timed out. + """ + + @attr.s class FakeChannel(object): """ @@ -153,7 +159,7 @@ def wait_until_result(clock, request, timeout=100): x += 1 if x > timeout: - raise Exception("Timed out waiting for request to finish.") + raise TimedOutException("Timed out waiting for request to finish.") clock.advance(0.1) -- cgit 1.5.1 From efdcbbe46bfe39f0dd3ef508bb08c37326892adc Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 6 Nov 2018 05:53:44 +1100 Subject: Tests for user consent resource (#4140) --- changelog.d/4140.bugfix | 1 + synapse/rest/consent/consent_resource.py | 2 +- tests/rest/client/test_consent.py | 111 +++++++++++++++++++++++++++++++ tests/server.py | 20 +++++- tests/unittest.py | 12 +++- 5 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 changelog.d/4140.bugfix create mode 100644 tests/rest/client/test_consent.py (limited to 'tests/server.py') diff --git a/changelog.d/4140.bugfix b/changelog.d/4140.bugfix new file mode 100644 index 0000000000..c7e0ee229d --- /dev/null +++ b/changelog.d/4140.bugfix @@ -0,0 +1 @@ +Generating the user consent URI no longer fails on Python 3. diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 89b82b0591..c85e84b465 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -227,7 +227,7 @@ class ConsentResource(Resource): key=self._hmac_secret, msg=userid.encode('utf-8'), digestmod=sha256, - ).hexdigest() + ).hexdigest().encode('ascii') if not compare_digest(want_mac, userhmac): raise SynapseError(http_client.FORBIDDEN, "HMAC incorrect") diff --git a/tests/rest/client/test_consent.py b/tests/rest/client/test_consent.py new file mode 100644 index 0000000000..df3f1cde6e --- /dev/null +++ b/tests/rest/client/test_consent.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from synapse.api.urls import ConsentURIBuilder +from synapse.rest.client.v1 import admin, login, room +from synapse.rest.consent import consent_resource + +from tests import unittest +from tests.server import render + +try: + from synapse.push.mailer import load_jinja2_templates +except Exception: + load_jinja2_templates = None + + +class ConsentResourceTestCase(unittest.HomeserverTestCase): + skip = "No Jinja installed" if not load_jinja2_templates else None + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + user_id = True + hijack_auth = False + + def make_homeserver(self, reactor, clock): + + config = self.default_config() + config.user_consent_version = "1" + config.public_baseurl = "" + config.form_secret = "123abc" + + # Make some temporary templates... + temp_consent_path = self.mktemp() + os.mkdir(temp_consent_path) + os.mkdir(os.path.join(temp_consent_path, 'en')) + config.user_consent_template_dir = os.path.abspath(temp_consent_path) + + with open(os.path.join(temp_consent_path, "en/1.html"), 'w') as f: + f.write("{{version}},{{has_consented}}") + + with open(os.path.join(temp_consent_path, "en/success.html"), 'w') as f: + f.write("yay!") + + hs = self.setup_test_homeserver(config=config) + return hs + + def test_accept_consent(self): + """ + A user can use the consent form to accept the terms. + """ + uri_builder = ConsentURIBuilder(self.hs.config) + resource = consent_resource.ConsentResource(self.hs) + + # Register a user + user_id = self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # Fetch the consent page, to get the consent version + consent_uri = ( + uri_builder.build_user_consent_uri(user_id).replace("_matrix/", "") + + "&u=user" + ) + request, channel = self.make_request( + "GET", consent_uri, access_token=access_token, shorthand=False + ) + render(request, resource, self.reactor) + self.assertEqual(channel.code, 200) + + # Get the version from the body, and whether we've consented + version, consented = channel.result["body"].decode('ascii').split(",") + self.assertEqual(consented, "False") + + # POST to the consent page, saying we've agreed + request, channel = self.make_request( + "POST", + consent_uri + "&v=" + version, + access_token=access_token, + shorthand=False, + ) + render(request, resource, self.reactor) + self.assertEqual(channel.code, 200) + + # Fetch the consent page, to get the consent version -- it should have + # changed + request, channel = self.make_request( + "GET", consent_uri, access_token=access_token, shorthand=False + ) + render(request, resource, self.reactor) + self.assertEqual(channel.code, 200) + + # Get the version from the body, and check that it's the version we + # agreed to, and that we've consented to it. + version, consented = channel.result["body"].decode('ascii').split(",") + self.assertEqual(consented, "True") + self.assertEqual(version, "1") diff --git a/tests/server.py b/tests/server.py index cc6dbe04ac..f63f33c94f 100644 --- a/tests/server.py +++ b/tests/server.py @@ -104,10 +104,24 @@ class FakeSite: return FakeLogger() -def make_request(method, path, content=b"", access_token=None, request=SynapseRequest): +def make_request( + method, path, content=b"", access_token=None, request=SynapseRequest, shorthand=True +): """ Make a web request using the given method and path, feed it the content, and return the Request and the Channel underneath. + + Args: + method (bytes/unicode): The HTTP request method ("verb"). + path (bytes/unicode): The HTTP path, suitably URL encoded (e.g. + escaped UTF-8 & spaces and such). + content (bytes or dict): The body of the request. JSON-encoded, if + a dict. + shorthand: Whether to try and be helpful and prefix the given URL + with the usual REST API path, if it doesn't contain it. + + Returns: + A synapse.http.site.SynapseRequest. """ if not isinstance(method, bytes): method = method.encode('ascii') @@ -115,8 +129,8 @@ def make_request(method, path, content=b"", access_token=None, request=SynapseRe if not isinstance(path, bytes): path = path.encode('ascii') - # Decorate it to be the full path - if not path.startswith(b"/_matrix"): + # Decorate it to be the full path, if we're using shorthand + if shorthand and not path.startswith(b"/_matrix"): path = b"/_matrix/client/r0/" + path path = path.replace(b"//", b"/") diff --git a/tests/unittest.py b/tests/unittest.py index 4d40bdb6a5..5e35c943d7 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -258,7 +258,13 @@ class HomeserverTestCase(TestCase): """ def make_request( - self, method, path, content=b"", access_token=None, request=SynapseRequest + self, + method, + path, + content=b"", + access_token=None, + request=SynapseRequest, + shorthand=True, ): """ Create a SynapseRequest at the path using the method and containing the @@ -270,6 +276,8 @@ class HomeserverTestCase(TestCase): escaped UTF-8 & spaces and such). content (bytes or dict): The body of the request. JSON-encoded, if a dict. + shorthand: Whether to try and be helpful and prefix the given URL + with the usual REST API path, if it doesn't contain it. Returns: A synapse.http.site.SynapseRequest. @@ -277,7 +285,7 @@ class HomeserverTestCase(TestCase): if isinstance(content, dict): content = json.dumps(content).encode('utf8') - return make_request(method, path, content, access_token, request) + return make_request(method, path, content, access_token, request, shorthand) def render(self, request): """ -- cgit 1.5.1 From e62f7f17b32fa51a818fdcfc756464692ddb9194 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 7 Nov 2018 03:00:00 +1100 Subject: Remove some boilerplate in tests (#4156) --- changelog.d/4156.misc | 1 + tests/rest/client/v1/test_admin.py | 116 +++++++++++++--------------- tests/rest/client/v1/test_register.py | 10 +-- tests/rest/client/v1/utils.py | 22 ++---- tests/rest/client/v2_alpha/test_filter.py | 95 +++++++---------------- tests/rest/client/v2_alpha/test_register.py | 52 ++++++------- tests/server.py | 20 ++++- tests/test_mau.py | 35 +++------ tests/test_server.py | 12 +-- tests/test_terms_auth.py | 5 +- tests/unittest.py | 10 ++- 11 files changed, 162 insertions(+), 216 deletions(-) create mode 100644 changelog.d/4156.misc (limited to 'tests/server.py') diff --git a/changelog.d/4156.misc b/changelog.d/4156.misc new file mode 100644 index 0000000000..20d404406c --- /dev/null +++ b/changelog.d/4156.misc @@ -0,0 +1 @@ +HTTP tests have been refactored to contain less boilerplate. diff --git a/tests/rest/client/v1/test_admin.py b/tests/rest/client/v1/test_admin.py index 1a553fa3f9..e38eb628a9 100644 --- a/tests/rest/client/v1/test_admin.py +++ b/tests/rest/client/v1/test_admin.py @@ -19,24 +19,17 @@ import json from mock import Mock -from synapse.http.server import JsonResource from synapse.rest.client.v1.admin import register_servlets -from synapse.util import Clock from tests import unittest -from tests.server import ( - ThreadedMemoryReactorClock, - make_request, - render, - setup_test_homeserver, -) -class UserRegisterTestCase(unittest.TestCase): - def setUp(self): +class UserRegisterTestCase(unittest.HomeserverTestCase): + + servlets = [register_servlets] + + def make_homeserver(self, reactor, clock): - self.clock = ThreadedMemoryReactorClock() - self.hs_clock = Clock(self.clock) self.url = "/_matrix/client/r0/admin/register" self.registration_handler = Mock() @@ -50,17 +43,14 @@ class UserRegisterTestCase(unittest.TestCase): self.secrets = Mock() - self.hs = setup_test_homeserver( - self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.clock - ) + self.hs = self.setup_test_homeserver() self.hs.config.registration_shared_secret = u"shared" self.hs.get_media_repository = Mock() self.hs.get_deactivate_account_handler = Mock() - self.resource = JsonResource(self.hs) - register_servlets(self.hs, self.resource) + return self.hs def test_disabled(self): """ @@ -69,8 +59,8 @@ class UserRegisterTestCase(unittest.TestCase): """ self.hs.config.registration_shared_secret = None - request, channel = make_request("POST", self.url, b'{}') - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, b'{}') + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual( @@ -87,8 +77,8 @@ class UserRegisterTestCase(unittest.TestCase): self.hs.get_secrets = Mock(return_value=secrets) - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) self.assertEqual(channel.json_body, {"nonce": "abcd"}) @@ -97,25 +87,25 @@ class UserRegisterTestCase(unittest.TestCase): Calling GET on the endpoint will return a randomised nonce, which will only last for SALT_TIMEOUT (60s). """ - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) nonce = channel.json_body["nonce"] # 59 seconds - self.clock.advance(59) + self.reactor.advance(59) body = json.dumps({"nonce": nonce}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('username must be specified', channel.json_body["error"]) # 61 seconds - self.clock.advance(2) + self.reactor.advance(2) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('unrecognised nonce', channel.json_body["error"]) @@ -124,8 +114,8 @@ class UserRegisterTestCase(unittest.TestCase): """ Only the provided nonce can be used, as it's checked in the MAC. """ - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) @@ -141,8 +131,8 @@ class UserRegisterTestCase(unittest.TestCase): "mac": want_mac, } ) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual("HMAC incorrect", channel.json_body["error"]) @@ -152,8 +142,8 @@ class UserRegisterTestCase(unittest.TestCase): When the correct nonce is provided, and the right key is provided, the user is registered. """ - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) @@ -169,8 +159,8 @@ class UserRegisterTestCase(unittest.TestCase): "mac": want_mac, } ) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual("@bob:test", channel.json_body["user_id"]) @@ -179,8 +169,8 @@ class UserRegisterTestCase(unittest.TestCase): """ A valid unrecognised nonce. """ - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) @@ -196,15 +186,15 @@ class UserRegisterTestCase(unittest.TestCase): "mac": want_mac, } ) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual("@bob:test", channel.json_body["user_id"]) # Now, try and reuse it - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('unrecognised nonce', channel.json_body["error"]) @@ -217,8 +207,8 @@ class UserRegisterTestCase(unittest.TestCase): """ def nonce(): - request, channel = make_request("GET", self.url) - render(request, self.resource, self.clock) + request, channel = self.make_request("GET", self.url) + self.render(request) return channel.json_body["nonce"] # @@ -227,8 +217,8 @@ class UserRegisterTestCase(unittest.TestCase): # Must be present body = json.dumps({}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('nonce must be specified', channel.json_body["error"]) @@ -239,32 +229,32 @@ class UserRegisterTestCase(unittest.TestCase): # Must be present body = json.dumps({"nonce": nonce()}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('username must be specified', channel.json_body["error"]) # Must be a string body = json.dumps({"nonce": nonce(), "username": 1234}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid username', channel.json_body["error"]) # Must not have null bytes body = json.dumps({"nonce": nonce(), "username": u"abcd\u0000"}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid username', channel.json_body["error"]) # Must not have null bytes body = json.dumps({"nonce": nonce(), "username": "a" * 1000}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid username', channel.json_body["error"]) @@ -275,16 +265,16 @@ class UserRegisterTestCase(unittest.TestCase): # Must be present body = json.dumps({"nonce": nonce(), "username": "a"}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('password must be specified', channel.json_body["error"]) # Must be a string body = json.dumps({"nonce": nonce(), "username": "a", "password": 1234}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid password', channel.json_body["error"]) @@ -293,16 +283,16 @@ class UserRegisterTestCase(unittest.TestCase): body = json.dumps( {"nonce": nonce(), "username": "a", "password": u"abcd\u0000"} ) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid password', channel.json_body["error"]) # Super long body = json.dumps({"nonce": nonce(), "username": "a", "password": "A" * 1000}) - request, channel = make_request("POST", self.url, body.encode('utf8')) - render(request, self.resource, self.clock) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid password', channel.json_body["error"]) diff --git a/tests/rest/client/v1/test_register.py b/tests/rest/client/v1/test_register.py index 6b7ff813d5..f973eff8cf 100644 --- a/tests/rest/client/v1/test_register.py +++ b/tests/rest/client/v1/test_register.py @@ -45,11 +45,11 @@ class CreateUserServletTestCase(unittest.TestCase): ) handlers = Mock(registration_handler=self.registration_handler) - self.clock = MemoryReactorClock() - self.hs_clock = Clock(self.clock) + self.reactor = MemoryReactorClock() + self.hs_clock = Clock(self.reactor) self.hs = self.hs = setup_test_homeserver( - self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.clock + self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.reactor ) self.hs.get_datastore = Mock(return_value=self.datastore) self.hs.get_handlers = Mock(return_value=handlers) @@ -76,8 +76,8 @@ class CreateUserServletTestCase(unittest.TestCase): return_value=(user_id, token) ) - request, channel = make_request(b"POST", url, request_data) - render(request, res, self.clock) + request, channel = make_request(self.reactor, b"POST", url, request_data) + render(request, res, self.reactor) self.assertEquals(channel.result["code"], b"200") diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py index 530dc8ba6d..9c401bf300 100644 --- a/tests/rest/client/v1/utils.py +++ b/tests/rest/client/v1/utils.py @@ -169,7 +169,7 @@ class RestHelper(object): path = path + "?access_token=%s" % tok request, channel = make_request( - "POST", path, json.dumps(content).encode('utf8') + self.hs.get_reactor(), "POST", path, json.dumps(content).encode('utf8') ) render(request, self.resource, self.hs.get_reactor()) @@ -217,7 +217,9 @@ class RestHelper(object): data = {"membership": membership} - request, channel = make_request("PUT", path, json.dumps(data).encode('utf8')) + request, channel = make_request( + self.hs.get_reactor(), "PUT", path, json.dumps(data).encode('utf8') + ) render(request, self.resource, self.hs.get_reactor()) @@ -228,18 +230,6 @@ class RestHelper(object): self.auth_user_id = temp_id - @defer.inlineCallbacks - def register(self, user_id): - (code, response) = yield self.mock_resource.trigger( - "POST", - "/_matrix/client/r0/register", - json.dumps( - {"user": user_id, "password": "test", "type": "m.login.password"} - ), - ) - self.assertEquals(200, code) - defer.returnValue(response) - def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200): if txn_id is None: txn_id = "m%s" % (str(time.time())) @@ -251,7 +241,9 @@ class RestHelper(object): if tok: path = path + "?access_token=%s" % tok - request, channel = make_request("PUT", path, json.dumps(content).encode('utf8')) + request, channel = make_request( + self.hs.get_reactor(), "PUT", path, json.dumps(content).encode('utf8') + ) render(request, self.resource, self.hs.get_reactor()) assert int(channel.result["code"]) == expect_code, ( diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index 6a886ee3b8..f42a8efbf4 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -13,84 +13,47 @@ # See the License for the specific language governing permissions and # limitations under the License. -import synapse.types from synapse.api.errors import Codes -from synapse.http.server import JsonResource from synapse.rest.client.v2_alpha import filter -from synapse.types import UserID -from synapse.util import Clock from tests import unittest -from tests.server import ( - ThreadedMemoryReactorClock as MemoryReactorClock, - make_request, - render, - setup_test_homeserver, -) PATH_PREFIX = "/_matrix/client/v2_alpha" -class FilterTestCase(unittest.TestCase): +class FilterTestCase(unittest.HomeserverTestCase): - USER_ID = "@apple:test" + user_id = "@apple:test" + hijack_auth = True EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}} EXAMPLE_FILTER_JSON = b'{"room": {"timeline": {"types": ["m.room.message"]}}}' - TO_REGISTER = [filter] + servlets = [filter.register_servlets] - def setUp(self): - self.clock = MemoryReactorClock() - self.hs_clock = Clock(self.clock) - - self.hs = setup_test_homeserver( - self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.clock - ) - - self.auth = self.hs.get_auth() - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.USER_ID), - "token_id": 1, - "is_guest": False, - } - - def get_user_by_req(request, allow_guest=False, rights="access"): - return synapse.types.create_requester( - UserID.from_string(self.USER_ID), 1, False, None - ) - - self.auth.get_user_by_access_token = get_user_by_access_token - self.auth.get_user_by_req = get_user_by_req - - self.store = self.hs.get_datastore() - self.filtering = self.hs.get_filtering() - self.resource = JsonResource(self.hs) - - for r in self.TO_REGISTER: - r.register_servlets(self.hs, self.resource) + def prepare(self, reactor, clock, hs): + self.filtering = hs.get_filtering() + self.store = hs.get_datastore() def test_add_filter(self): - request, channel = make_request( + request, channel = self.make_request( "POST", - "/_matrix/client/r0/user/%s/filter" % (self.USER_ID), + "/_matrix/client/r0/user/%s/filter" % (self.user_id), self.EXAMPLE_FILTER_JSON, ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"200") self.assertEqual(channel.json_body, {"filter_id": "0"}) filter = self.store.get_user_filter(user_localpart="apple", filter_id=0) - self.clock.advance(0) + self.pump() self.assertEquals(filter.result, self.EXAMPLE_FILTER) def test_add_filter_for_other_user(self): - request, channel = make_request( + request, channel = self.make_request( "POST", "/_matrix/client/r0/user/%s/filter" % ("@watermelon:test"), self.EXAMPLE_FILTER_JSON, ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"403") self.assertEquals(channel.json_body["errcode"], Codes.FORBIDDEN) @@ -98,12 +61,12 @@ class FilterTestCase(unittest.TestCase): def test_add_filter_non_local_user(self): _is_mine = self.hs.is_mine self.hs.is_mine = lambda target_user: False - request, channel = make_request( + request, channel = self.make_request( "POST", - "/_matrix/client/r0/user/%s/filter" % (self.USER_ID), + "/_matrix/client/r0/user/%s/filter" % (self.user_id), self.EXAMPLE_FILTER_JSON, ) - render(request, self.resource, self.clock) + self.render(request) self.hs.is_mine = _is_mine self.assertEqual(channel.result["code"], b"403") @@ -113,21 +76,21 @@ class FilterTestCase(unittest.TestCase): filter_id = self.filtering.add_user_filter( user_localpart="apple", user_filter=self.EXAMPLE_FILTER ) - self.clock.advance(1) + self.reactor.advance(1) filter_id = filter_id.result - request, channel = make_request( - "GET", "/_matrix/client/r0/user/%s/filter/%s" % (self.USER_ID, filter_id) + request, channel = self.make_request( + "GET", "/_matrix/client/r0/user/%s/filter/%s" % (self.user_id, filter_id) ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"200") self.assertEquals(channel.json_body, self.EXAMPLE_FILTER) def test_get_filter_non_existant(self): - request, channel = make_request( - "GET", "/_matrix/client/r0/user/%s/filter/12382148321" % (self.USER_ID) + request, channel = self.make_request( + "GET", "/_matrix/client/r0/user/%s/filter/12382148321" % (self.user_id) ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"400") self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND) @@ -135,18 +98,18 @@ class FilterTestCase(unittest.TestCase): # Currently invalid params do not have an appropriate errcode # in errors.py def test_get_filter_invalid_id(self): - request, channel = make_request( - "GET", "/_matrix/client/r0/user/%s/filter/foobar" % (self.USER_ID) + request, channel = self.make_request( + "GET", "/_matrix/client/r0/user/%s/filter/foobar" % (self.user_id) ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"400") # No ID also returns an invalid_id error def test_get_filter_no_id(self): - request, channel = make_request( - "GET", "/_matrix/client/r0/user/%s/filter/" % (self.USER_ID) + request, channel = self.make_request( + "GET", "/_matrix/client/r0/user/%s/filter/" % (self.user_id) ) - render(request, self.resource, self.clock) + self.render(request) self.assertEqual(channel.result["code"], b"400") diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 1c128e81f5..753d5c3e80 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -3,22 +3,19 @@ import json from mock import Mock from twisted.python import failure -from twisted.test.proto_helpers import MemoryReactorClock from synapse.api.errors import InteractiveAuthIncompleteError -from synapse.http.server import JsonResource from synapse.rest.client.v2_alpha.register import register_servlets -from synapse.util import Clock from tests import unittest -from tests.server import make_request, render, setup_test_homeserver -class RegisterRestServletTestCase(unittest.TestCase): - def setUp(self): +class RegisterRestServletTestCase(unittest.HomeserverTestCase): + + servlets = [register_servlets] + + def make_homeserver(self, reactor, clock): - self.clock = MemoryReactorClock() - self.hs_clock = Clock(self.clock) self.url = b"/_matrix/client/r0/register" self.appservice = None @@ -46,9 +43,7 @@ class RegisterRestServletTestCase(unittest.TestCase): identity_handler=self.identity_handler, login_handler=self.login_handler, ) - self.hs = setup_test_homeserver( - self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.clock - ) + self.hs = self.setup_test_homeserver() self.hs.get_auth = Mock(return_value=self.auth) self.hs.get_handlers = Mock(return_value=self.handlers) self.hs.get_auth_handler = Mock(return_value=self.auth_handler) @@ -58,8 +53,7 @@ class RegisterRestServletTestCase(unittest.TestCase): self.hs.config.registrations_require_3pid = [] self.hs.config.auto_join_rooms = [] - self.resource = JsonResource(self.hs) - register_servlets(self.hs, self.resource) + return self.hs def test_POST_appservice_registration_valid(self): user_id = "@kermit:muppet" @@ -69,10 +63,10 @@ class RegisterRestServletTestCase(unittest.TestCase): self.auth_handler.get_access_token_for_user_id = Mock(return_value=token) request_data = json.dumps({"username": "kermit"}) - request, channel = make_request( + request, channel = self.make_request( b"POST", self.url + b"?access_token=i_am_an_app_service", request_data ) - render(request, self.resource, self.clock) + self.render(request) self.assertEquals(channel.result["code"], b"200", channel.result) det_data = { @@ -85,25 +79,25 @@ class RegisterRestServletTestCase(unittest.TestCase): def test_POST_appservice_registration_invalid(self): self.appservice = None # no application service exists request_data = json.dumps({"username": "kermit"}) - request, channel = make_request( + request, channel = self.make_request( b"POST", self.url + b"?access_token=i_am_an_app_service", request_data ) - render(request, self.resource, self.clock) + self.render(request) self.assertEquals(channel.result["code"], b"401", channel.result) def test_POST_bad_password(self): request_data = json.dumps({"username": "kermit", "password": 666}) - request, channel = make_request(b"POST", self.url, request_data) - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) self.assertEquals(channel.result["code"], b"400", channel.result) self.assertEquals(channel.json_body["error"], "Invalid password") def test_POST_bad_username(self): request_data = json.dumps({"username": 777, "password": "monkey"}) - request, channel = make_request(b"POST", self.url, request_data) - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) self.assertEquals(channel.result["code"], b"400", channel.result) self.assertEquals(channel.json_body["error"], "Invalid username") @@ -121,8 +115,8 @@ class RegisterRestServletTestCase(unittest.TestCase): self.auth_handler.get_access_token_for_user_id = Mock(return_value=token) self.device_handler.check_device_registered = Mock(return_value=device_id) - request, channel = make_request(b"POST", self.url, request_data) - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) det_data = { "user_id": user_id, @@ -143,8 +137,8 @@ class RegisterRestServletTestCase(unittest.TestCase): self.auth_result = (None, {"username": "kermit", "password": "monkey"}, None) self.registration_handler.register = Mock(return_value=("@user:id", "t")) - request, channel = make_request(b"POST", self.url, request_data) - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) self.assertEquals(channel.result["code"], b"403", channel.result) self.assertEquals(channel.json_body["error"], "Registration has been disabled") @@ -155,8 +149,8 @@ class RegisterRestServletTestCase(unittest.TestCase): self.hs.config.allow_guest_access = True self.registration_handler.register = Mock(return_value=(user_id, None)) - request, channel = make_request(b"POST", self.url + b"?kind=guest", b"{}") - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) det_data = { "user_id": user_id, @@ -169,8 +163,8 @@ class RegisterRestServletTestCase(unittest.TestCase): def test_POST_disabled_guest_registration(self): self.hs.config.allow_guest_access = False - request, channel = make_request(b"POST", self.url + b"?kind=guest", b"{}") - render(request, self.resource, self.clock) + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) self.assertEquals(channel.result["code"], b"403", channel.result) self.assertEquals(channel.json_body["error"], "Guest access is disabled") diff --git a/tests/server.py b/tests/server.py index f63f33c94f..984cfe26d4 100644 --- a/tests/server.py +++ b/tests/server.py @@ -34,6 +34,7 @@ class FakeChannel(object): wire). """ + _reactor = attr.ib() result = attr.ib(default=attr.Factory(dict)) _producer = None @@ -63,6 +64,15 @@ class FakeChannel(object): def registerProducer(self, producer, streaming): self._producer = producer + self.producerStreaming = streaming + + def _produce(): + if self._producer: + self._producer.resumeProducing() + self._reactor.callLater(0.1, _produce) + + if not streaming: + self._reactor.callLater(0.0, _produce) def unregisterProducer(self): if self._producer is None: @@ -105,7 +115,13 @@ class FakeSite: def make_request( - method, path, content=b"", access_token=None, request=SynapseRequest, shorthand=True + reactor, + method, + path, + content=b"", + access_token=None, + request=SynapseRequest, + shorthand=True, ): """ Make a web request using the given method and path, feed it the @@ -138,7 +154,7 @@ def make_request( content = content.encode('utf8') site = FakeSite() - channel = FakeChannel() + channel = FakeChannel(reactor) req = request(site, channel) req.process = lambda: b"" diff --git a/tests/test_mau.py b/tests/test_mau.py index 5d387851c5..0afdeb0818 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -21,30 +21,20 @@ from mock import Mock, NonCallableMock from synapse.api.constants import LoginType from synapse.api.errors import Codes, HttpResponseException, SynapseError -from synapse.http.server import JsonResource from synapse.rest.client.v2_alpha import register, sync -from synapse.util import Clock from tests import unittest -from tests.server import ( - ThreadedMemoryReactorClock, - make_request, - render, - setup_test_homeserver, -) -class TestMauLimit(unittest.TestCase): - def setUp(self): - self.reactor = ThreadedMemoryReactorClock() - self.clock = Clock(self.reactor) +class TestMauLimit(unittest.HomeserverTestCase): - self.hs = setup_test_homeserver( - self.addCleanup, + servlets = [register.register_servlets, sync.register_servlets] + + def make_homeserver(self, reactor, clock): + + self.hs = self.setup_test_homeserver( "red", http_client=None, - clock=self.clock, - reactor=self.reactor, federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) @@ -63,10 +53,7 @@ class TestMauLimit(unittest.TestCase): self.hs.config.server_notices_mxid_display_name = None self.hs.config.server_notices_mxid_avatar_url = None self.hs.config.server_notices_room_name = "Test Server Notice Room" - - self.resource = JsonResource(self.hs) - register.register_servlets(self.hs, self.resource) - sync.register_servlets(self.hs, self.resource) + return self.hs def test_simple_deny_mau(self): # Create and sync so that the MAU counts get updated @@ -193,8 +180,8 @@ class TestMauLimit(unittest.TestCase): } ) - request, channel = make_request("POST", "/register", request_data) - render(request, self.resource, self.reactor) + request, channel = self.make_request("POST", "/register", request_data) + self.render(request) if channel.code != 200: raise HttpResponseException( @@ -206,10 +193,10 @@ class TestMauLimit(unittest.TestCase): return access_token def do_sync_for_user(self, token): - request, channel = make_request( + request, channel = self.make_request( "GET", "/sync", access_token=token ) - render(request, self.resource, self.reactor) + self.render(request) if channel.code != 200: raise HttpResponseException( diff --git a/tests/test_server.py b/tests/test_server.py index 4045fdadc3..f0e6291b7e 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -57,7 +57,9 @@ class JsonResourceTests(unittest.TestCase): "GET", [re.compile("^/_matrix/foo/(?P[^/]*)$")], _callback ) - request, channel = make_request(b"GET", b"/_matrix/foo/%E2%98%83?a=%E2%98%83") + request, channel = make_request( + self.reactor, b"GET", b"/_matrix/foo/%E2%98%83?a=%E2%98%83" + ) render(request, res, self.reactor) self.assertEqual(request.args, {b'a': [u"\N{SNOWMAN}".encode('utf8')]}) @@ -75,7 +77,7 @@ class JsonResourceTests(unittest.TestCase): res = JsonResource(self.homeserver) res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/_matrix/foo") + request, channel = make_request(self.reactor, b"GET", b"/_matrix/foo") render(request, res, self.reactor) self.assertEqual(channel.result["code"], b'500') @@ -98,7 +100,7 @@ class JsonResourceTests(unittest.TestCase): res = JsonResource(self.homeserver) res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/_matrix/foo") + request, channel = make_request(self.reactor, b"GET", b"/_matrix/foo") render(request, res, self.reactor) self.assertEqual(channel.result["code"], b'500') @@ -115,7 +117,7 @@ class JsonResourceTests(unittest.TestCase): res = JsonResource(self.homeserver) res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/_matrix/foo") + request, channel = make_request(self.reactor, b"GET", b"/_matrix/foo") render(request, res, self.reactor) self.assertEqual(channel.result["code"], b'403') @@ -136,7 +138,7 @@ class JsonResourceTests(unittest.TestCase): res = JsonResource(self.homeserver) res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/_matrix/foobar") + request, channel = make_request(self.reactor, b"GET", b"/_matrix/foobar") render(request, res, self.reactor) self.assertEqual(channel.result["code"], b'400') diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py index 0b71c6feb9..9ecc3ef14f 100644 --- a/tests/test_terms_auth.py +++ b/tests/test_terms_auth.py @@ -23,7 +23,6 @@ from synapse.rest.client.v2_alpha.register import register_servlets from synapse.util import Clock from tests import unittest -from tests.server import make_request class TermsTestCase(unittest.HomeserverTestCase): @@ -92,7 +91,7 @@ class TermsTestCase(unittest.HomeserverTestCase): self.registration_handler.check_username = Mock(return_value=True) - request, channel = make_request(b"POST", self.url, request_data) + request, channel = self.make_request(b"POST", self.url, request_data) self.render(request) # We don't bother checking that the response is correct - we'll leave that to @@ -110,7 +109,7 @@ class TermsTestCase(unittest.HomeserverTestCase): }, } ) - request, channel = make_request(b"POST", self.url, request_data) + request, channel = self.make_request(b"POST", self.url, request_data) self.render(request) # We're interested in getting a response that looks like a successful diff --git a/tests/unittest.py b/tests/unittest.py index 5e35c943d7..a9ce57da9a 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -189,11 +189,11 @@ class HomeserverTestCase(TestCase): for servlet in self.servlets: servlet(self.hs, self.resource) - if hasattr(self, "user_id"): - from tests.rest.client.v1.utils import RestHelper + from tests.rest.client.v1.utils import RestHelper - self.helper = RestHelper(self.hs, self.resource, self.user_id) + self.helper = RestHelper(self.hs, self.resource, getattr(self, "user_id", None)) + if hasattr(self, "user_id"): if self.hijack_auth: def get_user_by_access_token(token=None, allow_guest=False): @@ -285,7 +285,9 @@ class HomeserverTestCase(TestCase): if isinstance(content, dict): content = json.dumps(content).encode('utf8') - return make_request(method, path, content, access_token, request, shorthand) + return make_request( + self.reactor, method, path, content, access_token, request, shorthand + ) def render(self, request): """ -- cgit 1.5.1 From b3708830b847245a5d559a099fcaf738250b7cbe Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 8 Nov 2018 01:37:43 +1100 Subject: Fix URL preview bugs (type error when loading cache from db, content-type including quotes) (#4157) --- changelog.d/4157.bugfix | 1 + synapse/http/server.py | 8 +- synapse/rest/media/v1/preview_url_resource.py | 22 +++- tests/rest/media/v1/test_url_preview.py | 164 ++++++++++++++++++++++++++ tests/server.py | 2 + 5 files changed, 187 insertions(+), 10 deletions(-) create mode 100644 changelog.d/4157.bugfix create mode 100644 tests/rest/media/v1/test_url_preview.py (limited to 'tests/server.py') diff --git a/changelog.d/4157.bugfix b/changelog.d/4157.bugfix new file mode 100644 index 0000000000..265514c3af --- /dev/null +++ b/changelog.d/4157.bugfix @@ -0,0 +1 @@ +Loading URL previews from the DB cache on Postgres will no longer cause Unicode type errors when responding to the request, and URL previews will no longer fail if the remote server returns a Content-Type header with the chartype in quotes. \ No newline at end of file diff --git a/synapse/http/server.py b/synapse/http/server.py index b4b25cab19..6a427d96a6 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -468,13 +468,13 @@ def set_cors_headers(request): Args: request (twisted.web.http.Request): The http request to add CORs to. """ - request.setHeader("Access-Control-Allow-Origin", "*") + request.setHeader(b"Access-Control-Allow-Origin", b"*") request.setHeader( - "Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS" + b"Access-Control-Allow-Methods", b"GET, POST, PUT, DELETE, OPTIONS" ) request.setHeader( - "Access-Control-Allow-Headers", - "Origin, X-Requested-With, Content-Type, Accept, Authorization" + b"Access-Control-Allow-Headers", + b"Origin, X-Requested-With, Content-Type, Accept, Authorization" ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1a7bfd6b56..91d1dafe64 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import cgi import datetime import errno @@ -24,6 +25,7 @@ import shutil import sys import traceback +import six from six import string_types from six.moves import urllib_parse as urlparse @@ -98,7 +100,7 @@ class PreviewUrlResource(Resource): # XXX: if get_user_by_req fails, what should we do in an async render? requester = yield self.auth.get_user_by_req(request) url = parse_string(request, "url") - if "ts" in request.args: + if b"ts" in request.args: ts = parse_integer(request, "ts") else: ts = self.clock.time_msec() @@ -180,7 +182,12 @@ class PreviewUrlResource(Resource): cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): - defer.returnValue(cache_result["og"]) + # It may be stored as text in the database, not as bytes (such as + # PostgreSQL). If so, encode it back before handing it on. + og = cache_result["og"] + if isinstance(og, six.text_type): + og = og.encode('utf8') + defer.returnValue(og) return media_info = yield self._download_url(url, user) @@ -213,14 +220,17 @@ class PreviewUrlResource(Resource): elif _is_html(media_info['media_type']): # TODO: somehow stop a big HTML tree from exploding synapse's RAM - file = open(media_info['filename']) - body = file.read() - file.close() + with open(media_info['filename'], 'rb') as file: + body = file.read() # clobber the encoding from the content-type, or default to utf-8 # XXX: this overrides any or XML charset headers in the body # which may pose problems, but so far seems to work okay. - match = re.match(r'.*; *charset=(.*?)(;|$)', media_info['media_type'], re.I) + match = re.match( + r'.*; *charset="?(.*?)"?(;|$)', + media_info['media_type'], + re.I + ) encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info['uri'], encoding) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py new file mode 100644 index 0000000000..29579cf091 --- /dev/null +++ b/tests/rest/media/v1/test_url_preview.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from mock import Mock + +from twisted.internet.defer import Deferred + +from synapse.config.repository import MediaStorageProviderConfig +from synapse.util.module_loader import load_module + +from tests import unittest + + +class URLPreviewTests(unittest.HomeserverTestCase): + + hijack_auth = True + user_id = "@test:user" + + def make_homeserver(self, reactor, clock): + + self.storage_path = self.mktemp() + os.mkdir(self.storage_path) + + config = self.default_config() + config.url_preview_enabled = True + config.max_spider_size = 9999999 + config.url_preview_url_blacklist = [] + config.media_store_path = self.storage_path + + provider_config = { + "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend", + "store_local": True, + "store_synchronous": False, + "store_remote": True, + "config": {"directory": self.storage_path}, + } + + loaded = list(load_module(provider_config)) + [ + MediaStorageProviderConfig(False, False, False) + ] + + config.media_storage_providers = [loaded] + + hs = self.setup_test_homeserver(config=config) + + return hs + + def prepare(self, reactor, clock, hs): + + self.fetches = [] + + def get_file(url, output_stream, max_size): + """ + Returns tuple[int,dict,str,int] of file length, response headers, + absolute URI, and response code. + """ + + def write_to(r): + data, response = r + output_stream.write(data) + return response + + d = Deferred() + d.addCallback(write_to) + self.fetches.append((d, url)) + return d + + client = Mock() + client.get_file = get_file + + self.media_repo = hs.get_media_repository_resource() + preview_url = self.media_repo.children[b'preview_url'] + preview_url.client = client + self.preview_url = preview_url + + def test_cache_returns_correct_type(self): + + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # We've made one fetch + self.assertEqual(len(self.fetches), 1) + + end_content = ( + b'' + b'' + b'' + b'' + ) + + self.fetches[0][0].callback( + ( + end_content, + ( + len(end_content), + { + b"Content-Length": [b"%d" % (len(end_content))], + b"Content-Type": [b'text/html; charset="utf8"'], + }, + "https://example.com", + 200, + ), + ) + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} + ) + + # Check the cache returns the correct response + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # Only one fetch, still, since we'll lean on the cache + self.assertEqual(len(self.fetches), 1) + + # Check the cache response has the same content + self.assertEqual(channel.code, 200) + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} + ) + + # Clear the in-memory cache + self.assertIn("matrix.org", self.preview_url._cache) + self.preview_url._cache.pop("matrix.org") + self.assertNotIn("matrix.org", self.preview_url._cache) + + # Check the database cache returns the correct response + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # Only one fetch, still, since we'll lean on the cache + self.assertEqual(len(self.fetches), 1) + + # Check the cache response has the same content + self.assertEqual(channel.code, 200) + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} + ) diff --git a/tests/server.py b/tests/server.py index 984cfe26d4..7919a1f124 100644 --- a/tests/server.py +++ b/tests/server.py @@ -57,6 +57,8 @@ class FakeChannel(object): self.result["headers"] = headers def write(self, content): + assert isinstance(content, bytes), "Should be bytes! " + repr(content) + if "body" not in self.result: self.result["body"] = b"" -- cgit 1.5.1 From 8b1affe7d5ca991a1924bc054dfd64ddb3d7b488 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 15 Nov 2018 15:55:58 -0600 Subject: Fix Content-Disposition in media repository (#4176) --- changelog.d/4176.bugfix | 1 + synapse/rest/media/v1/_base.py | 122 +++++++++++++++++----- synapse/rest/media/v1/media_repository.py | 48 ++------- synapse/rest/media/v1/preview_url_resource.py | 30 +----- tests/rest/media/v1/test_media_storage.py | 145 ++++++++++++++++++++++++++ tests/server.py | 15 +++ 6 files changed, 271 insertions(+), 90 deletions(-) create mode 100644 changelog.d/4176.bugfix (limited to 'tests/server.py') diff --git a/changelog.d/4176.bugfix b/changelog.d/4176.bugfix new file mode 100644 index 0000000000..3846f8a27b --- /dev/null +++ b/changelog.d/4176.bugfix @@ -0,0 +1 @@ +The media repository now no longer fails to decode UTF-8 filenames when downloading remote media. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 76e479afa3..efe42a429d 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os +from six import PY3 from six.moves import urllib from twisted.internet import defer @@ -48,26 +49,21 @@ def parse_media_id(request): return server_name, media_id, file_name except Exception: raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN ) def respond_404(request): respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, ) @defer.inlineCallbacks -def respond_with_file(request, media_type, file_path, - file_size=None, upload_name=None): +def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): @@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename=%s" % (_quote(upload_name),) else: - disposition = ( - "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - request.setHeader(b"Content-Disposition", disposition) + request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + request.setHeader(b"Content-Length", b"%d" % (file_size,)) @defer.inlineCallbacks @@ -153,6 +144,7 @@ class Responder(object): Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + def write_to_consumer(self, consumer): """Stream response into consumer @@ -186,9 +178,18 @@ class FileInfo(object): thumbnail_method (str) thumbnail_type (str): Content type of thumbnail, e.g. image/png """ - def __init__(self, server_name, file_id, url_cache=False, - thumbnail=False, thumbnail_width=None, thumbnail_height=None, - thumbnail_method=None, thumbnail_type=None): + + def __init__( + self, + server_name, + file_id, + url_cache=False, + thumbnail=False, + thumbnail_width=None, + thumbnail_height=None, + thumbnail_method=None, + thumbnail_type=None, + ): self.server_name = server_name self.file_id = file_id self.url_cache = url_cache @@ -197,3 +198,74 @@ class FileInfo(object): self.thumbnail_height = thumbnail_height self.thumbnail_method = thumbnail_method self.thumbnail_type = thumbnail_type + + +def get_filename_from_headers(headers): + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers (twisted.web.http_headers.Headers): The HTTP + request headers. + + Returns: + A Unicode string of the filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b'']) + + # No header, bail out. + if not content_disposition[0]: + return + + # dict of unicode: bytes, corresponding to the key value sections of the + # Content-Disposition header. + params = {} + parts = content_disposition[0].split(b";") + for i in parts: + # Split into key-value pairs, if able + # We don't care about things like `inline`, so throw it out + if b"=" not in i: + continue + + key, value = i.strip().split(b"=") + params[key.decode('ascii')] = value + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + if PY3: + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode('ascii'), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + else: + # On Python 2, we first unquote the %-encoded parts and then + # decode it strictly using UTF-8. + try: + upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') + except UnicodeDecodeError: + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + # Make sure there's no %-quoted bytes. If there is, reject it as + # non-valid ASCII. + if b"%" not in upload_name_ascii: + upload_name = upload_name_ascii.decode('ascii') + + # This may be None here, indicating we did not find a matching name. + return upload_name diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d6c5f07af0..e117836e9a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -14,14 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import errno import logging import os import shutil -from six import PY3, iteritems -from six.moves.urllib import parse as urlparse +from six import iteritems import twisted.internet.error import twisted.web.http @@ -34,14 +32,18 @@ from synapse.api.errors import ( NotFoundError, SynapseError, ) -from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import logcontext from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string -from ._base import FileInfo, respond_404, respond_with_responder +from ._base import ( + FileInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths @@ -62,7 +64,7 @@ class MediaRepository(object): def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) + self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -397,39 +399,9 @@ class MediaRepository(object): yield finish() media_type = headers[b"Content-Type"][0].decode('ascii') - + upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - if PY3: - upload_name = urlparse.unquote(upload_name) - else: - upload_name = urlparse.unquote(upload_name.encode('ascii')) - try: - if isinstance(upload_name, bytes): - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 9b15699e4d..d0ecf241b6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import datetime import errno import fnmatch @@ -44,10 +43,11 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.media.v1._base import get_filename_from_headers from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string from ._base import FileInfo @@ -336,31 +336,7 @@ class PreviewUrlResource(Resource): media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - download_name = None - - # First check if there is a valid UTF-8 filename - download_name_utf8 = params.get("filename*", None) - if download_name_utf8: - if download_name_utf8.lower().startswith("utf-8''"): - download_name = download_name_utf8[7:] - - # If there isn't check for an ascii name. - if not download_name: - download_name_ascii = params.get("filename", None) - if download_name_ascii and is_ascii(download_name_ascii): - download_name = download_name_ascii - - if download_name: - download_name = urlparse.unquote(download_name) - try: - download_name = download_name.decode("utf-8") - except UnicodeDecodeError: - download_name = None - else: - download_name = None + download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index a86901c2d8..fd131e3454 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -17,15 +17,20 @@ import os import shutil import tempfile +from binascii import unhexlify from mock import Mock +from six.moves.urllib import parse from twisted.internet import defer, reactor +from twisted.internet.defer import Deferred +from synapse.config.repository import MediaStorageProviderConfig from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend +from synapse.util.module_loader import load_module from tests import unittest @@ -83,3 +88,143 @@ class MediaStorageTests(unittest.TestCase): body = f.read() self.assertEqual(test_body, body) + + +class MediaRepoTests(unittest.HomeserverTestCase): + + hijack_auth = True + user_id = "@test:user" + + def make_homeserver(self, reactor, clock): + + self.fetches = [] + + def get_file(destination, path, output_stream, args=None, max_size=None): + """ + Returns tuple[int,dict,str,int] of file length, response headers, + absolute URI, and response code. + """ + + def write_to(r): + data, response = r + output_stream.write(data) + return response + + d = Deferred() + d.addCallback(write_to) + self.fetches.append((d, destination, path, args)) + return d + + client = Mock() + client.get_file = get_file + + self.storage_path = self.mktemp() + os.mkdir(self.storage_path) + + config = self.default_config() + config.media_store_path = self.storage_path + config.thumbnail_requirements = {} + config.max_image_pixels = 2000000 + + provider_config = { + "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend", + "store_local": True, + "store_synchronous": False, + "store_remote": True, + "config": {"directory": self.storage_path}, + } + + loaded = list(load_module(provider_config)) + [ + MediaStorageProviderConfig(False, False, False) + ] + + config.media_storage_providers = [loaded] + + hs = self.setup_test_homeserver(config=config, http_client=client) + + return hs + + def prepare(self, reactor, clock, hs): + + self.media_repo = hs.get_media_repository_resource() + self.download_resource = self.media_repo.children[b'download'] + + # smol png + self.end_content = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + def _req(self, content_disposition): + + request, channel = self.make_request( + "GET", "example.com/12345", shorthand=False + ) + request.render(self.download_resource) + self.pump() + + # We've made one fetch, to example.com, using the media URL, and asking + # the other server not to do a remote fetch + self.assertEqual(len(self.fetches), 1) + self.assertEqual(self.fetches[0][1], "example.com") + self.assertEqual( + self.fetches[0][2], "/_matrix/media/v1/download/example.com/12345" + ) + self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) + + headers = { + b"Content-Length": [b"%d" % (len(self.end_content))], + b"Content-Type": [b'image/png'], + } + if content_disposition: + headers[b"Content-Disposition"] = [content_disposition] + + self.fetches[0][0].callback( + (self.end_content, (len(self.end_content), headers)) + ) + + self.pump() + self.assertEqual(channel.code, 200) + + return channel + + def test_disposition_filename_ascii(self): + """ + If the filename is filename= then Synapse will decode it as an + ASCII string, and use filename= in the response. + """ + channel = self._req(b"inline; filename=out.png") + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual( + headers.getRawHeaders(b"Content-Disposition"), [b"inline; filename=out.png"] + ) + + def test_disposition_filenamestar_utf8escaped(self): + """ + If the filename is filename=*utf8'' then Synapse will + correctly decode it as the UTF-8 string, and use filename* in the + response. + """ + filename = parse.quote(u"\u2603".encode('utf8')).encode('ascii') + channel = self._req(b"inline; filename*=utf-8''" + filename + b".png") + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual( + headers.getRawHeaders(b"Content-Disposition"), + [b"inline; filename*=utf-8''" + filename + b".png"], + ) + + def test_disposition_none(self): + """ + If there is no filename, one isn't passed on in the Content-Disposition + of the request. + """ + channel = self._req(None) + + headers = channel.headers + self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"]) + self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None) diff --git a/tests/server.py b/tests/server.py index 7919a1f124..ceec2f2d4e 100644 --- a/tests/server.py +++ b/tests/server.py @@ -14,6 +14,8 @@ from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import IReactorPluggableNameResolver from twisted.python.failure import Failure from twisted.test.proto_helpers import MemoryReactorClock +from twisted.web.http import unquote +from twisted.web.http_headers import Headers from synapse.http.site import SynapseRequest from synapse.util import Clock @@ -50,6 +52,15 @@ class FakeChannel(object): raise Exception("No result yet.") return int(self.result["code"]) + @property + def headers(self): + if not self.result: + raise Exception("No result yet.") + h = Headers() + for i in self.result["headers"]: + h.addRawHeader(*i) + return h + def writeHeaders(self, version, code, reason, headers): self.result["version"] = version self.result["code"] = code @@ -152,6 +163,9 @@ def make_request( path = b"/_matrix/client/r0/" + path path = path.replace(b"//", b"/") + if not path.startswith(b"/"): + path = b"/" + path + if isinstance(content, text_type): content = content.encode('utf8') @@ -161,6 +175,7 @@ def make_request( req = request(site, channel) req.process = lambda: b"" req.content = BytesIO(content) + req.postpath = list(map(unquote, path[1:].split(b'/'))) if access_token: req.requestHeaders.addRawHeader( -- cgit 1.5.1 From ea6abf6724f4572a709047034fe2a672641068b9 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 22 Dec 2018 01:56:13 +1100 Subject: Fix IP URL previews on Python 3 (#4215) --- .coveragerc | 1 - .gitignore | 2 - changelog.d/4215.misc | 1 + synapse/http/client.py | 377 +++++++++++++++-------- synapse/http/endpoint.py | 35 --- synapse/rest/media/v1/preview_url_resource.py | 14 +- tests/rest/media/v1/test_url_preview.py | 424 ++++++++++++++++++++------ tests/server.py | 8 + 8 files changed, 590 insertions(+), 272 deletions(-) create mode 100644 changelog.d/4215.misc (limited to 'tests/server.py') diff --git a/.coveragerc b/.coveragerc index ca333961f3..9873a30738 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,4 +9,3 @@ source= [report] precision = 2 -ignore_errors = True diff --git a/.gitignore b/.gitignore index 1b632646bb..d739595c3a 100644 --- a/.gitignore +++ b/.gitignore @@ -59,7 +59,5 @@ env/ .vscode/ .ropeproject/ - *.deb - /debs diff --git a/changelog.d/4215.misc b/changelog.d/4215.misc new file mode 100644 index 0000000000..bb90594836 --- /dev/null +++ b/changelog.d/4215.misc @@ -0,0 +1 @@ +Getting URL previews of IP addresses no longer fails on Python 3. diff --git a/synapse/http/client.py b/synapse/http/client.py index 3d05f83b8c..afcf698b29 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -21,28 +21,25 @@ from six.moves import urllib import treq from canonicaljson import encode_canonical_json, json +from netaddr import IPAddress from prometheus_client import Counter +from zope.interface import implementer, provider from OpenSSL import SSL from OpenSSL.SSL import VERIFY_NONE -from twisted.internet import defer, protocol, reactor, ssl -from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.web._newclient import ResponseDone -from twisted.web.client import ( - Agent, - BrowserLikeRedirectAgent, - ContentDecoderAgent, - GzipDecoder, - HTTPConnectionPool, - PartialDownloadError, - readBody, +from twisted.internet import defer, protocol, ssl +from twisted.internet.interfaces import ( + IReactorPluggableNameResolver, + IResolutionReceiver, ) +from twisted.python.failure import Failure +from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, HTTPConnectionPool, PartialDownloadError, readBody from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers from synapse.api.errors import Codes, HttpResponseException, SynapseError from synapse.http import cancelled_to_request_timed_out_error, redact_uri -from synapse.http.endpoint import SpiderEndpoint from synapse.util.async_helpers import timeout_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable @@ -50,8 +47,125 @@ from synapse.util.logcontext import make_deferred_yieldable logger = logging.getLogger(__name__) outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"]) -incoming_responses_counter = Counter("synapse_http_client_responses", "", - ["method", "code"]) +incoming_responses_counter = Counter( + "synapse_http_client_responses", "", ["method", "code"] +) + + +def check_against_blacklist(ip_address, ip_whitelist, ip_blacklist): + """ + Args: + ip_address (netaddr.IPAddress) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + if ip_address in ip_blacklist: + if ip_whitelist is None or ip_address not in ip_whitelist: + return True + return False + + +class IPBlacklistingResolver(object): + """ + A proxy for reactor.nameResolver which only produces non-blacklisted IP + addresses, preventing DNS rebinding attacks on URL preview. + """ + + def __init__(self, reactor, ip_whitelist, ip_blacklist): + """ + Args: + reactor (twisted.internet.reactor) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + self._reactor = reactor + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + def resolveHostName(self, recv, hostname, portNumber=0): + + r = recv() + d = defer.Deferred() + addresses = [] + + @provider(IResolutionReceiver) + class EndpointReceiver(object): + @staticmethod + def resolutionBegan(resolutionInProgress): + pass + + @staticmethod + def addressResolved(address): + ip_address = IPAddress(address.host) + + if check_against_blacklist( + ip_address, self._ip_whitelist, self._ip_blacklist + ): + logger.info( + "Dropped %s from DNS resolution to %s" % (ip_address, hostname) + ) + raise SynapseError(403, "IP address blocked by IP blacklist entry") + + addresses.append(address) + + @staticmethod + def resolutionComplete(): + d.callback(addresses) + + self._reactor.nameResolver.resolveHostName( + EndpointReceiver, hostname, portNumber=portNumber + ) + + def _callback(addrs): + r.resolutionBegan(None) + for i in addrs: + r.addressResolved(i) + r.resolutionComplete() + + d.addCallback(_callback) + + return r + + +class BlacklistingAgentWrapper(Agent): + """ + An Agent wrapper which will prevent access to IP addresses being accessed + directly (without an IP address lookup). + """ + + def __init__(self, agent, reactor, ip_whitelist=None, ip_blacklist=None): + """ + Args: + agent (twisted.web.client.Agent): The Agent to wrap. + reactor (twisted.internet.reactor) + ip_whitelist (netaddr.IPSet) + ip_blacklist (netaddr.IPSet) + """ + self._agent = agent + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + def request(self, method, uri, headers=None, bodyProducer=None): + h = urllib.parse.urlparse(uri.decode('ascii')) + + try: + ip_address = IPAddress(h.hostname) + + if check_against_blacklist( + ip_address, self._ip_whitelist, self._ip_blacklist + ): + logger.info( + "Blocking access to %s because of blacklist" % (ip_address,) + ) + e = SynapseError(403, "IP address blocked by IP blacklist entry") + return defer.fail(Failure(e)) + except Exception: + # Not an IP + pass + + return self._agent.request( + method, uri, headers=headers, bodyProducer=bodyProducer + ) class SimpleHttpClient(object): @@ -59,14 +173,54 @@ class SimpleHttpClient(object): A simple, no-frills HTTP client with methods that wrap up common ways of using HTTP in Matrix """ - def __init__(self, hs): + + def __init__(self, hs, treq_args={}, ip_whitelist=None, ip_blacklist=None): + """ + Args: + hs (synapse.server.HomeServer) + treq_args (dict): Extra keyword arguments to be given to treq.request. + ip_blacklist (netaddr.IPSet): The IP addresses that are blacklisted that + we may not request. + ip_whitelist (netaddr.IPSet): The whitelisted IP addresses, that we can + request if it were otherwise caught in a blacklist. + """ self.hs = hs - pool = HTTPConnectionPool(reactor) + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + self._extra_treq_args = treq_args + + self.user_agent = hs.version_string + self.clock = hs.get_clock() + if hs.config.user_agent_suffix: + self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix) + + self.user_agent = self.user_agent.encode('ascii') + + if self._ip_blacklist: + real_reactor = hs.get_reactor() + # If we have an IP blacklist, we need to use a DNS resolver which + # filters out blacklisted IP addresses, to prevent DNS rebinding. + nameResolver = IPBlacklistingResolver( + real_reactor, self._ip_whitelist, self._ip_blacklist + ) + + @implementer(IReactorPluggableNameResolver) + class Reactor(object): + def __getattr__(_self, attr): + if attr == "nameResolver": + return nameResolver + else: + return getattr(real_reactor, attr) + + self.reactor = Reactor() + else: + self.reactor = hs.get_reactor() # the pusher makes lots of concurrent SSL connections to sygnal, and - # tends to do so in batches, so we need to allow the pool to keep lots - # of idle connections around. + # tends to do so in batches, so we need to allow the pool to keep + # lots of idle connections around. + pool = HTTPConnectionPool(self.reactor) pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) pool.cachedConnectionTimeout = 2 * 60 @@ -74,20 +228,35 @@ class SimpleHttpClient(object): # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( - reactor, + self.reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory(), + contextFactory=self.hs.get_http_client_context_factory(), pool=pool, ) - self.user_agent = hs.version_string - self.clock = hs.get_clock() - if hs.config.user_agent_suffix: - self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,) - self.user_agent = self.user_agent.encode('ascii') + if self._ip_blacklist: + # If we have an IP blacklist, we then install the blacklisting Agent + # which prevents direct access to IP addresses, that are not caught + # by the DNS resolution. + self.agent = BlacklistingAgentWrapper( + self.agent, + self.reactor, + ip_whitelist=self._ip_whitelist, + ip_blacklist=self._ip_blacklist, + ) @defer.inlineCallbacks def request(self, method, uri, data=b'', headers=None): + """ + Args: + method (str): HTTP method to use. + uri (str): URI to query. + data (bytes): Data to send in the request body, if applicable. + headers (t.w.http_headers.Headers): Request headers. + + Raises: + SynapseError: If the IP is blacklisted. + """ # A small wrapper around self.agent.request() so we can easily attach # counters to it outgoing_requests_counter.labels(method).inc() @@ -97,25 +266,34 @@ class SimpleHttpClient(object): try: request_deferred = treq.request( - method, uri, agent=self.agent, data=data, headers=headers + method, + uri, + agent=self.agent, + data=data, + headers=headers, + **self._extra_treq_args ) request_deferred = timeout_deferred( - request_deferred, 60, self.hs.get_reactor(), + request_deferred, + 60, + self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable(request_deferred) incoming_responses_counter.labels(method, response.code).inc() logger.info( - "Received response to %s %s: %s", - method, redact_uri(uri), response.code + "Received response to %s %s: %s", method, redact_uri(uri), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, redact_uri(uri), type(e).__name__, e.args[0] + method, + redact_uri(uri), + type(e).__name__, + e.args[0], ) raise @@ -140,8 +318,9 @@ class SimpleHttpClient(object): # TODO: Do we ever want to log message contents? logger.debug("post_urlencoded_get_json args: %s", args) - query_bytes = urllib.parse.urlencode( - encode_urlencode_args(args), True).encode("utf8") + query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True).encode( + "utf8" + ) actual_headers = { b"Content-Type": [b"application/x-www-form-urlencoded"], @@ -151,10 +330,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "POST", - uri, - headers=Headers(actual_headers), - data=query_bytes + "POST", uri, headers=Headers(actual_headers), data=query_bytes ) if 200 <= response.code < 300: @@ -193,10 +369,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "POST", - uri, - headers=Headers(actual_headers), - data=json_str + "POST", uri, headers=Headers(actual_headers), data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -264,10 +437,7 @@ class SimpleHttpClient(object): actual_headers.update(headers) response = yield self.request( - "PUT", - uri, - headers=Headers(actual_headers), - data=json_str + "PUT", uri, headers=Headers(actual_headers), data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -299,17 +469,11 @@ class SimpleHttpClient(object): query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) - actual_headers = { - b"User-Agent": [self.user_agent], - } + actual_headers = {b"User-Agent": [self.user_agent]} if headers: actual_headers.update(headers) - response = yield self.request( - "GET", - uri, - headers=Headers(actual_headers), - ) + response = yield self.request("GET", uri, headers=Headers(actual_headers)) body = yield make_deferred_yieldable(readBody(response)) @@ -334,22 +498,18 @@ class SimpleHttpClient(object): headers, absolute URI of the response and HTTP response code. """ - actual_headers = { - b"User-Agent": [self.user_agent], - } + actual_headers = {b"User-Agent": [self.user_agent]} if headers: actual_headers.update(headers) - response = yield self.request( - "GET", - url, - headers=Headers(actual_headers), - ) + response = yield self.request("GET", url, headers=Headers(actual_headers)) resp_headers = dict(response.headers.getAllRawHeaders()) - if (b'Content-Length' in resp_headers and - int(resp_headers[b'Content-Length']) > max_size): + if ( + b'Content-Length' in resp_headers + and int(resp_headers[b'Content-Length'][0]) > max_size + ): logger.warn("Requested URL is too large > %r bytes" % (self.max_size,)) raise SynapseError( 502, @@ -359,26 +519,20 @@ class SimpleHttpClient(object): if response.code > 299: logger.warn("Got %d when downloading %s" % (response.code, url)) - raise SynapseError( - 502, - "Got error %d" % (response.code,), - Codes.UNKNOWN, - ) + raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN) # TODO: if our Content-Type is HTML or something, just read the first # N bytes into RAM rather than saving it all to disk only to read it # straight back in again try: - length = yield make_deferred_yieldable(_readBodyToFile( - response, output_stream, max_size, - )) + length = yield make_deferred_yieldable( + _readBodyToFile(response, output_stream, max_size) + ) except Exception as e: logger.exception("Failed to download body") raise SynapseError( - 502, - ("Failed to download remote body: %s" % e), - Codes.UNKNOWN, + 502, ("Failed to download remote body: %s" % e), Codes.UNKNOWN ) defer.returnValue( @@ -387,13 +541,14 @@ class SimpleHttpClient(object): resp_headers, response.request.absoluteURI.decode('ascii'), response.code, - ), + ) ) # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. # The two should be factored out. + class _ReadBodyToFileProtocol(protocol.Protocol): def __init__(self, stream, deferred, max_size): self.stream = stream @@ -405,11 +560,13 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.stream.write(data) self.length += len(data) if self.max_size is not None and self.length >= self.max_size: - self.deferred.errback(SynapseError( - 502, - "Requested file is too large > %r bytes" % (self.max_size,), - Codes.TOO_LARGE, - )) + self.deferred.errback( + SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + ) + ) self.deferred = defer.Deferred() self.transport.loseConnection() @@ -427,6 +584,7 @@ class _ReadBodyToFileProtocol(protocol.Protocol): # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. # The two should be factored out. + def _readBodyToFile(response, stream, max_size): d = defer.Deferred() response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) @@ -449,10 +607,12 @@ class CaptchaServerHttpClient(SimpleHttpClient): "POST", url, data=query_bytes, - headers=Headers({ - b"Content-Type": [b"application/x-www-form-urlencoded"], - b"User-Agent": [self.user_agent], - }) + headers=Headers( + { + b"Content-Type": [b"application/x-www-form-urlencoded"], + b"User-Agent": [self.user_agent], + } + ), ) try: @@ -463,57 +623,6 @@ class CaptchaServerHttpClient(SimpleHttpClient): defer.returnValue(e.response) -class SpiderEndpointFactory(object): - def __init__(self, hs): - self.blacklist = hs.config.url_preview_ip_range_blacklist - self.whitelist = hs.config.url_preview_ip_range_whitelist - self.policyForHTTPS = hs.get_http_client_context_factory() - - def endpointForURI(self, uri): - logger.info("Getting endpoint for %s", uri.toBytes()) - - if uri.scheme == b"http": - endpoint_factory = HostnameEndpoint - elif uri.scheme == b"https": - tlsCreator = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) - - def endpoint_factory(reactor, host, port, **kw): - return wrapClientTLS( - tlsCreator, - HostnameEndpoint(reactor, host, port, **kw)) - else: - logger.warn("Can't get endpoint for unrecognised scheme %s", uri.scheme) - return None - return SpiderEndpoint( - reactor, uri.host, uri.port, self.blacklist, self.whitelist, - endpoint=endpoint_factory, endpoint_kw_args=dict(timeout=15), - ) - - -class SpiderHttpClient(SimpleHttpClient): - """ - Separate HTTP client for spidering arbitrary URLs. - Special in that it follows retries and has a UA that looks - like a browser. - - used by the preview_url endpoint in the content repo. - """ - def __init__(self, hs): - SimpleHttpClient.__init__(self, hs) - # clobber the base class's agent and UA: - self.agent = ContentDecoderAgent( - BrowserLikeRedirectAgent( - Agent.usingEndpointFactory( - reactor, - SpiderEndpointFactory(hs) - ) - ), [(b'gzip', GzipDecoder)] - ) - # We could look like Chrome: - # self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) - # Chrome Safari" % hs.version_string) - - def encode_urlencode_args(args): return {k: encode_urlencode_arg(v) for k, v in args.items()} diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index 91025037a3..f86a0b624e 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -218,41 +218,6 @@ class _WrappedConnection(object): return d -class SpiderEndpoint(object): - """An endpoint which refuses to connect to blacklisted IP addresses - Implements twisted.internet.interfaces.IStreamClientEndpoint. - """ - def __init__(self, reactor, host, port, blacklist, whitelist, - endpoint=HostnameEndpoint, endpoint_kw_args={}): - self.reactor = reactor - self.host = host - self.port = port - self.blacklist = blacklist - self.whitelist = whitelist - self.endpoint = endpoint - self.endpoint_kw_args = endpoint_kw_args - - @defer.inlineCallbacks - def connect(self, protocolFactory): - address = yield self.reactor.resolve(self.host) - - from netaddr import IPAddress - ip_address = IPAddress(address) - - if ip_address in self.blacklist: - if self.whitelist is None or ip_address not in self.whitelist: - raise ConnectError( - "Refusing to spider blacklisted IP address %s" % address - ) - - logger.info("Connecting to %s:%s", address, self.port) - endpoint = self.endpoint( - self.reactor, address, self.port, **self.endpoint_kw_args - ) - connection = yield endpoint.connect(protocolFactory) - defer.returnValue(connection) - - class SRVClientEndpoint(object): """An endpoint which looks up SRV records for a service. Cycles through the list of servers starting with each call to connect diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index d0ecf241b6..ba3ab1d37d 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -35,7 +35,7 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET from synapse.api.errors import Codes, SynapseError -from synapse.http.client import SpiderHttpClient +from synapse.http.client import SimpleHttpClient from synapse.http.server import ( respond_with_json, respond_with_json_bytes, @@ -69,7 +69,12 @@ class PreviewUrlResource(Resource): self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() - self.client = SpiderHttpClient(hs) + self.client = SimpleHttpClient( + hs, + treq_args={"browser_like_redirects": True}, + ip_whitelist=hs.config.url_preview_ip_range_whitelist, + ip_blacklist=hs.config.url_preview_ip_range_blacklist, + ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage @@ -318,6 +323,11 @@ class PreviewUrlResource(Resource): length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index c62f71b44a..650ce95a6f 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -15,21 +15,55 @@ import os -from mock import Mock +import attr +from netaddr import IPSet -from twisted.internet.defer import Deferred +from twisted.internet._resolver import HostResolution +from twisted.internet.address import IPv4Address, IPv6Address +from twisted.internet.error import DNSLookupError +from twisted.python.failure import Failure +from twisted.test.proto_helpers import AccumulatingProtocol +from twisted.web._newclient import ResponseDone from synapse.config.repository import MediaStorageProviderConfig -from synapse.util.logcontext import make_deferred_yieldable from synapse.util.module_loader import load_module from tests import unittest +from tests.server import FakeTransport + + +@attr.s +class FakeResponse(object): + version = attr.ib() + code = attr.ib() + phrase = attr.ib() + headers = attr.ib() + body = attr.ib() + absoluteURI = attr.ib() + + @property + def request(self): + @attr.s + class FakeTransport(object): + absoluteURI = self.absoluteURI + + return FakeTransport() + + def deliverBody(self, protocol): + protocol.dataReceived(self.body) + protocol.connectionLost(Failure(ResponseDone())) class URLPreviewTests(unittest.HomeserverTestCase): hijack_auth = True user_id = "@test:user" + end_content = ( + b'' + b'' + b'' + b'' + ) def make_homeserver(self, reactor, clock): @@ -39,6 +73,15 @@ class URLPreviewTests(unittest.HomeserverTestCase): config = self.default_config() config.url_preview_enabled = True config.max_spider_size = 9999999 + config.url_preview_ip_range_blacklist = IPSet( + ( + "192.168.1.1", + "1.0.0.0/8", + "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "2001:800::/21", + ) + ) + config.url_preview_ip_range_whitelist = IPSet(("1.1.1.1",)) config.url_preview_url_blacklist = [] config.media_store_path = self.storage_path @@ -62,63 +105,50 @@ class URLPreviewTests(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): - self.fetches = [] + self.media_repo = hs.get_media_repository_resource() + self.preview_url = self.media_repo.children[b'preview_url'] - def get_file(url, output_stream, max_size): - """ - Returns tuple[int,dict,str,int] of file length, response headers, - absolute URI, and response code. - """ + self.lookups = {} - def write_to(r): - data, response = r - output_stream.write(data) - return response + class Resolver(object): + def resolveHostName( + _self, + resolutionReceiver, + hostName, + portNumber=0, + addressTypes=None, + transportSemantics='TCP', + ): - d = Deferred() - d.addCallback(write_to) - self.fetches.append((d, url)) - return make_deferred_yieldable(d) + resolution = HostResolution(hostName) + resolutionReceiver.resolutionBegan(resolution) + if hostName not in self.lookups: + raise DNSLookupError("OH NO") - client = Mock() - client.get_file = get_file + for i in self.lookups[hostName]: + resolutionReceiver.addressResolved(i[0]('TCP', i[1], portNumber)) + resolutionReceiver.resolutionComplete() + return resolutionReceiver - self.media_repo = hs.get_media_repository_resource() - preview_url = self.media_repo.children[b'preview_url'] - preview_url.client = client - self.preview_url = preview_url + self.reactor.nameResolver = Resolver() def test_cache_returns_correct_type(self): + self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")] request, channel = self.make_request( - "GET", "url_preview?url=matrix.org", shorthand=False + "GET", "url_preview?url=http://matrix.org", shorthand=False ) request.render(self.preview_url) self.pump() - # We've made one fetch - self.assertEqual(len(self.fetches), 1) - - end_content = ( - b'' - b'' - b'' - b'' - ) - - self.fetches[0][0].callback( - ( - end_content, - ( - len(end_content), - { - b"Content-Length": [b"%d" % (len(end_content))], - b"Content-Type": [b'text/html; charset="utf8"'], - }, - "https://example.com", - 200, - ), - ) + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n" + % (len(self.end_content),) + + self.end_content ) self.pump() @@ -129,14 +159,11 @@ class URLPreviewTests(unittest.HomeserverTestCase): # Check the cache returns the correct response request, channel = self.make_request( - "GET", "url_preview?url=matrix.org", shorthand=False + "GET", "url_preview?url=http://matrix.org", shorthand=False ) request.render(self.preview_url) self.pump() - # Only one fetch, still, since we'll lean on the cache - self.assertEqual(len(self.fetches), 1) - # Check the cache response has the same content self.assertEqual(channel.code, 200) self.assertEqual( @@ -144,20 +171,17 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) # Clear the in-memory cache - self.assertIn("matrix.org", self.preview_url._cache) - self.preview_url._cache.pop("matrix.org") - self.assertNotIn("matrix.org", self.preview_url._cache) + self.assertIn("http://matrix.org", self.preview_url._cache) + self.preview_url._cache.pop("http://matrix.org") + self.assertNotIn("http://matrix.org", self.preview_url._cache) # Check the database cache returns the correct response request, channel = self.make_request( - "GET", "url_preview?url=matrix.org", shorthand=False + "GET", "url_preview?url=http://matrix.org", shorthand=False ) request.render(self.preview_url) self.pump() - # Only one fetch, still, since we'll lean on the cache - self.assertEqual(len(self.fetches), 1) - # Check the cache response has the same content self.assertEqual(channel.code, 200) self.assertEqual( @@ -165,78 +189,282 @@ class URLPreviewTests(unittest.HomeserverTestCase): ) def test_non_ascii_preview_httpequiv(self): + self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")] + + end_content = ( + b'' + b'' + b'' + b'' + b'' + ) request, channel = self.make_request( - "GET", "url_preview?url=matrix.org", shorthand=False + "GET", "url_preview?url=http://matrix.org", shorthand=False ) request.render(self.preview_url) self.pump() - # We've made one fetch - self.assertEqual(len(self.fetches), 1) + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b"Content-Type: text/html; charset=\"utf8\"\r\n\r\n" + ) + % (len(end_content),) + + end_content + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") + + def test_non_ascii_preview_content_type(self): + self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")] end_content = ( b'' - b'' b'' b'' b'' ) - self.fetches[0][0].callback( + request, channel = self.make_request( + "GET", "url_preview?url=http://matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( ( - end_content, - ( - len(end_content), - { - b"Content-Length": [b"%d" % (len(end_content))], - # This charset=utf-8 should be ignored, because the - # document has a meta tag overriding it. - b"Content-Type": [b'text/html; charset="utf8"'], - }, - "https://example.com", - 200, - ), + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b"Content-Type: text/html; charset=\"windows-1251\"\r\n\r\n" ) + % (len(end_content),) + + end_content ) self.pump() self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") - def test_non_ascii_preview_content_type(self): + def test_ipaddr(self): + """ + IP addresses can be previewed directly. + """ + self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")] request, channel = self.make_request( - "GET", "url_preview?url=matrix.org", shorthand=False + "GET", "url_preview?url=http://example.com", shorthand=False ) request.render(self.preview_url) self.pump() - # We've made one fetch - self.assertEqual(len(self.fetches), 1) + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n" + % (len(self.end_content),) + + self.end_content + ) - end_content = ( - b'' - b'' - b'' - b'' + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} ) - self.fetches[0][0].callback( - ( - end_content, - ( - len(end_content), - { - b"Content-Length": [b"%d" % (len(end_content))], - b"Content-Type": [b'text/html; charset="windows-1251"'], - }, - "https://example.com", - 200, - ), - ) + def test_blacklisted_ip_specific(self): + """ + Blacklisted IP addresses, found via DNS, are not spidered. + """ + self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # No requests made. + self.assertEqual(len(self.reactor.tcpClients), 0) + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ip_range(self): + """ + Blacklisted IP ranges, IPs found over DNS, are not spidered. + """ + self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ip_specific_direct(self): + """ + Blacklisted IP addresses, accessed directly, are not spidered. + """ + request, channel = self.make_request( + "GET", "url_preview?url=http://192.168.1.1", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # No requests made. + self.assertEqual(len(self.reactor.tcpClients), 0) + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ip_range_direct(self): + """ + Blacklisted IP ranges, accessed directly, are not spidered. + """ + request, channel = self.make_request( + "GET", "url_preview?url=http://1.1.1.2", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ip_range_whitelisted_ip(self): + """ + Blacklisted but then subsequently whitelisted IP addresses can be + spidered. + """ + self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n" + % (len(self.end_content),) + + self.end_content ) self.pump() self.assertEqual(channel.code, 200) - self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} + ) + + def test_blacklisted_ip_with_external_ip(self): + """ + If a hostname resolves a blacklisted IP, even if there's a + non-blacklisted one, it will be rejected. + """ + # Hardcode the URL resolving to the IP we want. + self.lookups[u"example.com"] = [ + (IPv4Address, "1.1.1.2"), + (IPv4Address, "8.8.8.8"), + ] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ipv6_specific(self): + """ + Blacklisted IP addresses, found via DNS, are not spidered. + """ + self.lookups["example.com"] = [ + (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff") + ] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # No requests made. + self.assertEqual(len(self.reactor.tcpClients), 0) + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) + + def test_blacklisted_ipv6_range(self): + """ + Blacklisted IP ranges, IPs found over DNS, are not spidered. + """ + self.lookups["example.com"] = [(IPv6Address, "2001:800::1")] + + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + self.assertEqual(channel.code, 403) + self.assertEqual( + channel.json_body, + { + 'errcode': 'M_UNKNOWN', + 'error': 'IP address blocked by IP blacklist entry', + }, + ) diff --git a/tests/server.py b/tests/server.py index ceec2f2d4e..db43fa0db8 100644 --- a/tests/server.py +++ b/tests/server.py @@ -383,8 +383,16 @@ class FakeTransport(object): self.disconnecting = True def pauseProducing(self): + if not self.producer: + return + self.producer.pauseProducing() + def resumeProducing(self): + if not self.producer: + return + self.producer.resumeProducing() + def unregisterProducer(self): if not self.producer: return -- cgit 1.5.1 From d02c4532c068efc54c6abce77e9ec55018b2d444 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 22 Jan 2019 20:28:48 +0000 Subject: Add a test for MatrixFederationAgent --- .../federation/test_matrix_federation_agent.py | 183 +++++++++++++++++++++ tests/server.py | 13 +- 2 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 tests/http/federation/test_matrix_federation_agent.py (limited to 'tests/server.py') diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py new file mode 100644 index 0000000000..eb963d80fb --- /dev/null +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from mock import Mock + +import treq + +from twisted.internet import defer +from twisted.internet.protocol import Factory +from twisted.protocols.tls import TLSMemoryBIOFactory +from twisted.test.ssl_helpers import ServerTLSContext +from twisted.web.http import HTTPChannel + +from synapse.crypto.context_factory import ClientTLSOptionsFactory +from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent +from synapse.util.logcontext import LoggingContext + +from tests.server import FakeTransport, ThreadedMemoryReactorClock +from tests.unittest import TestCase + +logger = logging.getLogger(__name__) + + +class MatrixFederationAgentTests(TestCase): + def setUp(self): + self.reactor = ThreadedMemoryReactorClock() + + self.mock_resolver = Mock() + + self.agent = MatrixFederationAgent( + reactor=self.reactor, + tls_client_options_factory=ClientTLSOptionsFactory(None), + _srv_resolver=self.mock_resolver, + ) + + def _make_connection(self, client_factory): + """Builds a test server, and completes the outgoing client connection + + Returns: + HTTPChannel: the test server + """ + + # build the test server + server_tls_protocol = _build_test_server() + + # now, tell the client protocol factory to build the client protocol (it will be a + # _WrappingProtocol, around a TLSMemoryBIOProtocol, around an + # HTTP11ClientProtocol) and wire the output of said protocol up to the server via + # a FakeTransport. + # + # Normally this would be done by the TCP socket code in Twisted, but we are + # stubbing that out here. + client_protocol = client_factory.buildProtocol(None) + client_protocol.makeConnection(FakeTransport(server_tls_protocol, self.reactor)) + + # tell the server tls protocol to send its stuff back to the client, too + server_tls_protocol.makeConnection(FakeTransport(client_protocol, self.reactor)) + + # finally, give the reactor a pump to get the TLS juices flowing. + self.reactor.pump((0.1,)) + + # fish the test server back out of the server-side TLS protocol. + return server_tls_protocol.wrappedProtocol + + @defer.inlineCallbacks + def _make_get_request(self, uri): + """ + Sends a simple GET request via the agent, and checks its logcontext management + """ + with LoggingContext("one") as context: + fetch_d = self.agent.request(b'GET', uri) + + # Nothing happened yet + self.assertNoResult(fetch_d) + + # should have reset logcontext to the sentinel + _check_logcontext(LoggingContext.sentinel) + + try: + fetch_res = yield fetch_d + defer.returnValue(fetch_res) + finally: + _check_logcontext(context) + + def test_get(self): + """ + happy-path test of a GET request + """ + self.reactor.lookups["testserv"] = "1.2.3.4" + test_d = self._make_get_request(b"matrix://testserv:8448/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection(client_factory) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'testserv:8448'] + ) + content = request.content.read() + self.assertEqual(content, b'') + + # Deferred is still without a result + self.assertNoResult(test_d) + + # send the headers + request.responseHeaders.setRawHeaders(b'Content-Type', [b'application/json']) + request.write('') + + self.reactor.pump((0.1,)) + + response = self.successResultOf(test_d) + + # that should give us a Response object + self.assertEqual(response.code, 200) + + # Send the body + request.write('{ "a": 1 }'.encode('ascii')) + request.finish() + + self.reactor.pump((0.1,)) + + # check it can be read + json = self.successResultOf(treq.json_content(response)) + self.assertEqual(json, {"a": 1}) + + +def _check_logcontext(context): + current = LoggingContext.current_context() + if current is not context: + raise AssertionError( + "Expected logcontext %s but was %s" % (context, current), + ) + + +def _build_test_server(): + """Construct a test server + + This builds an HTTP channel, wrapped with a TLSMemoryBIOProtocol + + Returns: + TLSMemoryBIOProtocol + """ + server_factory = Factory.forProtocol(HTTPChannel) + # Request.finish expects the factory to have a 'log' method. + server_factory.log = _log_request + + server_tls_factory = TLSMemoryBIOFactory( + ServerTLSContext(), isClient=False, wrappedFactory=server_factory, + ) + + return server_tls_factory.buildProtocol(None) + + +def _log_request(request): + """Implements Factory.log, which is expected by Request.finish""" + logger.info("Completed request %s", request) diff --git a/tests/server.py b/tests/server.py index db43fa0db8..3a4aa55645 100644 --- a/tests/server.py +++ b/tests/server.py @@ -1,4 +1,5 @@ import json +import logging from io import BytesIO from six import text_type @@ -22,6 +23,8 @@ from synapse.util import Clock from tests.utils import setup_test_homeserver as _sth +logger = logging.getLogger(__name__) + class TimedOutException(Exception): """ @@ -414,6 +417,11 @@ class FakeTransport(object): self.buffer = self.buffer + byt def _write(): + if not self.buffer: + # nothing to do. Don't write empty buffers: it upsets the + # TLSMemoryBIOProtocol + return + if getattr(self.other, "transport") is not None: self.other.dataReceived(self.buffer) self.buffer = b"" @@ -421,7 +429,10 @@ class FakeTransport(object): self._reactor.callLater(0.0, _write) - _write() + # always actually do the write asynchronously. Some protocols (notably the + # TLSMemoryBIOProtocol) get very confused if a read comes back while they are + # still doing a write. Doing a callLater here breaks the cycle. + self._reactor.callLater(0.0, _write) def writeSequence(self, seq): for x in seq: -- cgit 1.5.1 From 6b574f3df7ec9ff2b92e85b34f055b50cd318930 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 23 Jan 2019 11:25:36 +0000 Subject: fix python2 test failure --- tests/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/server.py') diff --git a/tests/server.py b/tests/server.py index 3a4aa55645..ed2a046ae6 100644 --- a/tests/server.py +++ b/tests/server.py @@ -342,7 +342,7 @@ def get_clock(): return (clock, hs_clock) -@attr.s +@attr.s(cmp=False) class FakeTransport(object): """ A twisted.internet.interfaces.ITransport implementation which sends all its data -- cgit 1.5.1 From f2b553d65692ad6dffd54fc5e911ae76b3f27aeb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 29 Jan 2019 09:38:29 +0000 Subject: Use SimpleResolverComplexifier in tests (#4497) two reasons for this. One, it saves a bunch of boilerplate. Two, it squashes unicode to IDNA-in-a-`str` (even on python 3) in a way that it turns out we rely on to give consistent behaviour between python 2 and 3. --- changelog.d/4497.feature | 1 + .../federation/test_matrix_federation_agent.py | 7 ++-- tests/server.py | 41 +++++++--------------- 3 files changed, 17 insertions(+), 32 deletions(-) create mode 100644 changelog.d/4497.feature (limited to 'tests/server.py') diff --git a/changelog.d/4497.feature b/changelog.d/4497.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4497.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 8257594fb8..53b52ace59 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -377,9 +377,8 @@ class MatrixFederationAgentTests(TestCase): self.mock_resolver.resolve_service.side_effect = lambda _: [] - # hostnameendpoint does the lookup on the unicode value (getaddrinfo encodes - # it back to idna) - self.reactor.lookups[u"bücher.com"] = "1.2.3.4" + # the resolver is always called with the IDNA hostname as a native string. + self.reactor.lookups["xn--bcher-kva.com"] = "1.2.3.4" # this is idna for bücher.com test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") @@ -424,7 +423,7 @@ class MatrixFederationAgentTests(TestCase): self.mock_resolver.resolve_service.side_effect = lambda _: [ Server(host=b"xn--trget-3qa.com", port=8443) # târget.com ] - self.reactor.lookups[u"târget.com"] = "1.2.3.4" + self.reactor.lookups["xn--trget-3qa.com"] = "1.2.3.4" test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") diff --git a/tests/server.py b/tests/server.py index ed2a046ae6..6adcc73f91 100644 --- a/tests/server.py +++ b/tests/server.py @@ -8,11 +8,10 @@ import attr from zope.interface import implementer from twisted.internet import address, threads, udp -from twisted.internet._resolver import HostResolution -from twisted.internet.address import IPv4Address -from twisted.internet.defer import Deferred +from twisted.internet._resolver import SimpleResolverComplexifier +from twisted.internet.defer import Deferred, fail, succeed from twisted.internet.error import DNSLookupError -from twisted.internet.interfaces import IReactorPluggableNameResolver +from twisted.internet.interfaces import IReactorPluggableNameResolver, IResolverSimple from twisted.python.failure import Failure from twisted.test.proto_helpers import MemoryReactorClock from twisted.web.http import unquote @@ -227,30 +226,16 @@ class ThreadedMemoryReactorClock(MemoryReactorClock): def __init__(self): self._udp = [] - self.lookups = {} - - class Resolver(object): - def resolveHostName( - _self, - resolutionReceiver, - hostName, - portNumber=0, - addressTypes=None, - transportSemantics='TCP', - ): - - resolution = HostResolution(hostName) - resolutionReceiver.resolutionBegan(resolution) - if hostName not in self.lookups: - raise DNSLookupError("OH NO") - - resolutionReceiver.addressResolved( - IPv4Address('TCP', self.lookups[hostName], portNumber) - ) - resolutionReceiver.resolutionComplete() - return resolution - - self.nameResolver = Resolver() + lookups = self.lookups = {} + + @implementer(IResolverSimple) + class FakeResolver(object): + def getHostByName(self, name, timeout=None): + if name not in lookups: + return fail(DNSLookupError("OH NO: unknown %s" % (name, ))) + return succeed(lookups[name]) + + self.nameResolver = SimpleResolverComplexifier(FakeResolver()) super(ThreadedMemoryReactorClock, self).__init__() def listenUDP(self, port, protocol, interface='', maxPacketSize=8196): -- cgit 1.5.1 From 99e36d5e243059a4c92a769a2920714b2d4c84d9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 29 Jan 2019 13:53:02 +0000 Subject: Implement MSC1708 (.well-known lookups for server routing) (#4489) --- MANIFEST.in | 1 + changelog.d/4408.feature | 1 + changelog.d/4408.misc | 1 - changelog.d/4409.feature | 1 + changelog.d/4409.misc | 1 - changelog.d/4426.feature | 1 + changelog.d/4426.misc | 1 - changelog.d/4427.feature | 1 + changelog.d/4427.misc | 1 - changelog.d/4428.feature | 1 + changelog.d/4428.misc | 1 - changelog.d/4464.feature | 1 + changelog.d/4464.misc | 1 - changelog.d/4468.feature | 1 + changelog.d/4468.misc | 1 - changelog.d/4487.feature | 1 + changelog.d/4487.misc | 1 - changelog.d/4489.feature | 1 + synapse/http/federation/matrix_federation_agent.py | 114 ++++++++++- tests/http/__init__.py | 42 ++++ .../federation/test_matrix_federation_agent.py | 223 ++++++++++++++++++++- tests/http/server.pem | 81 ++++++++ tests/server.py | 13 +- 23 files changed, 470 insertions(+), 21 deletions(-) create mode 100644 changelog.d/4408.feature delete mode 100644 changelog.d/4408.misc create mode 100644 changelog.d/4409.feature delete mode 100644 changelog.d/4409.misc create mode 100644 changelog.d/4426.feature delete mode 100644 changelog.d/4426.misc create mode 100644 changelog.d/4427.feature delete mode 100644 changelog.d/4427.misc create mode 100644 changelog.d/4428.feature delete mode 100644 changelog.d/4428.misc create mode 100644 changelog.d/4464.feature delete mode 100644 changelog.d/4464.misc create mode 100644 changelog.d/4468.feature delete mode 100644 changelog.d/4468.misc create mode 100644 changelog.d/4487.feature delete mode 100644 changelog.d/4487.misc create mode 100644 changelog.d/4489.feature create mode 100644 tests/http/server.pem (limited to 'tests/server.py') diff --git a/MANIFEST.in b/MANIFEST.in index 7e4c031b79..eb2de60f72 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,6 +15,7 @@ recursive-include docs * recursive-include scripts * recursive-include scripts-dev * recursive-include synapse *.pyi +recursive-include tests *.pem recursive-include tests *.py recursive-include synapse/res * diff --git a/changelog.d/4408.feature b/changelog.d/4408.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4408.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4408.misc b/changelog.d/4408.misc deleted file mode 100644 index 729bafd62e..0000000000 --- a/changelog.d/4408.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor 'sign_request' as 'build_auth_headers' \ No newline at end of file diff --git a/changelog.d/4409.feature b/changelog.d/4409.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4409.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4409.misc b/changelog.d/4409.misc deleted file mode 100644 index 9cf2adfbb1..0000000000 --- a/changelog.d/4409.misc +++ /dev/null @@ -1 +0,0 @@ -Remove redundant federation connection wrapping code diff --git a/changelog.d/4426.feature b/changelog.d/4426.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4426.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4426.misc b/changelog.d/4426.misc deleted file mode 100644 index cda50438e0..0000000000 --- a/changelog.d/4426.misc +++ /dev/null @@ -1 +0,0 @@ -Remove redundant SynapseKeyClientProtocol magic \ No newline at end of file diff --git a/changelog.d/4427.feature b/changelog.d/4427.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4427.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4427.misc b/changelog.d/4427.misc deleted file mode 100644 index 75500bdbc2..0000000000 --- a/changelog.d/4427.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor and cleanup for SRV record lookup diff --git a/changelog.d/4428.feature b/changelog.d/4428.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4428.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4428.misc b/changelog.d/4428.misc deleted file mode 100644 index 9a51434755..0000000000 --- a/changelog.d/4428.misc +++ /dev/null @@ -1 +0,0 @@ -Move SRV logic into the Agent layer diff --git a/changelog.d/4464.feature b/changelog.d/4464.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4464.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4464.misc b/changelog.d/4464.misc deleted file mode 100644 index 9a51434755..0000000000 --- a/changelog.d/4464.misc +++ /dev/null @@ -1 +0,0 @@ -Move SRV logic into the Agent layer diff --git a/changelog.d/4468.feature b/changelog.d/4468.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4468.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4468.misc b/changelog.d/4468.misc deleted file mode 100644 index 9a51434755..0000000000 --- a/changelog.d/4468.misc +++ /dev/null @@ -1 +0,0 @@ -Move SRV logic into the Agent layer diff --git a/changelog.d/4487.feature b/changelog.d/4487.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4487.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/changelog.d/4487.misc b/changelog.d/4487.misc deleted file mode 100644 index 79de8eb3ad..0000000000 --- a/changelog.d/4487.misc +++ /dev/null @@ -1 +0,0 @@ -Fix idna and ipv6 literal handling in MatrixFederationAgent diff --git a/changelog.d/4489.feature b/changelog.d/4489.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4489.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 4a6f634c8b..07c72c9351 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cgi +import json import logging import attr @@ -20,7 +22,7 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.web.client import URI, Agent, HTTPConnectionPool +from twisted.web.client import URI, Agent, HTTPConnectionPool, readBody from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent @@ -43,13 +45,19 @@ class MatrixFederationAgent(object): tls_client_options_factory (ClientTLSOptionsFactory|None): factory to use for fetching client tls options, or none to disable TLS. + _well_known_tls_policy (IPolicyForHTTPS|None): + TLS policy to use for fetching .well-known files. None to use a default + (browser-like) implementation. + srv_resolver (SrvResolver|None): SRVResolver impl to use for looking up SRV records. None to use a default implementation. """ def __init__( - self, reactor, tls_client_options_factory, _srv_resolver=None, + self, reactor, tls_client_options_factory, + _well_known_tls_policy=None, + _srv_resolver=None, ): self._reactor = reactor self._tls_client_options_factory = tls_client_options_factory @@ -62,6 +70,14 @@ class MatrixFederationAgent(object): self._pool.maxPersistentPerHost = 5 self._pool.cachedConnectionTimeout = 2 * 60 + agent_args = {} + if _well_known_tls_policy is not None: + # the param is called 'contextFactory', but actually passing a + # contextfactory is deprecated, and it expects an IPolicyForHTTPS. + agent_args['contextFactory'] = _well_known_tls_policy + _well_known_agent = Agent(self._reactor, pool=self._pool, **agent_args) + self._well_known_agent = _well_known_agent + @defer.inlineCallbacks def request(self, method, uri, headers=None, bodyProducer=None): """ @@ -114,7 +130,11 @@ class MatrixFederationAgent(object): class EndpointFactory(object): @staticmethod def endpointForURI(_uri): - logger.info("Connecting to %s:%s", res.target_host, res.target_port) + logger.info( + "Connecting to %s:%i", + res.target_host.decode("ascii"), + res.target_port, + ) ep = HostnameEndpoint(self._reactor, res.target_host, res.target_port) if tls_options is not None: ep = wrapClientTLS(tls_options, ep) @@ -127,7 +147,7 @@ class MatrixFederationAgent(object): defer.returnValue(res) @defer.inlineCallbacks - def _route_matrix_uri(self, parsed_uri): + def _route_matrix_uri(self, parsed_uri, lookup_well_known=True): """Helper for `request`: determine the routing for a Matrix URI Args: @@ -135,6 +155,9 @@ class MatrixFederationAgent(object): parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1 if there is no explicit port given. + lookup_well_known (bool): True if we should look up the .well-known file if + there is no SRV record. + Returns: Deferred[_RoutingResult] """ @@ -169,6 +192,42 @@ class MatrixFederationAgent(object): service_name = b"_matrix._tcp.%s" % (parsed_uri.host,) server_list = yield self._srv_resolver.resolve_service(service_name) + if not server_list and lookup_well_known: + # try a .well-known lookup + well_known_server = yield self._get_well_known(parsed_uri.host) + + if well_known_server: + # if we found a .well-known, start again, but don't do another + # .well-known lookup. + + # parse the server name in the .well-known response into host/port. + # (This code is lifted from twisted.web.client.URI.fromBytes). + if b':' in well_known_server: + well_known_host, well_known_port = well_known_server.rsplit(b':', 1) + try: + well_known_port = int(well_known_port) + except ValueError: + # the part after the colon could not be parsed as an int + # - we assume it is an IPv6 literal with no port (the closing + # ']' stops it being parsed as an int) + well_known_host, well_known_port = well_known_server, -1 + else: + well_known_host, well_known_port = well_known_server, -1 + + new_uri = URI( + scheme=parsed_uri.scheme, + netloc=well_known_server, + host=well_known_host, + port=well_known_port, + path=parsed_uri.path, + params=parsed_uri.params, + query=parsed_uri.query, + fragment=parsed_uri.fragment, + ) + + res = yield self._route_matrix_uri(new_uri, lookup_well_known=False) + defer.returnValue(res) + if not server_list: target_host = parsed_uri.host port = 8448 @@ -190,6 +249,53 @@ class MatrixFederationAgent(object): target_port=port, )) + @defer.inlineCallbacks + def _get_well_known(self, server_name): + """Attempt to fetch and parse a .well-known file for the given server + + Args: + server_name (bytes): name of the server, from the requested url + + Returns: + Deferred[bytes|None]: either the new server name, from the .well-known, or + None if there was no .well-known file. + """ + # FIXME: add a cache + + uri = b"https://%s/.well-known/matrix/server" % (server_name, ) + logger.info("Fetching %s", uri.decode("ascii")) + try: + response = yield make_deferred_yieldable( + self._well_known_agent.request(b"GET", uri), + ) + except Exception as e: + logger.info( + "Connection error fetching %s: %s", + uri.decode("ascii"), e, + ) + defer.returnValue(None) + + body = yield make_deferred_yieldable(readBody(response)) + + if response.code != 200: + logger.info( + "Error response %i from %s: %s", + response.code, uri.decode("ascii"), body, + ) + defer.returnValue(None) + + content_types = response.headers.getRawHeaders(u'content-type') + if content_types is None: + raise Exception("no content-type header on .well-known response") + content_type, _opts = cgi.parse_header(content_types[-1]) + if content_type != 'application/json': + raise Exception("content-type not application/json on .well-known response") + parsed_body = json.loads(body.decode('utf-8')) + logger.info("Response from .well-known: %s", parsed_body) + if not isinstance(parsed_body, dict) or "m.server" not in parsed_body: + raise Exception("invalid .well-known response") + defer.returnValue(parsed_body["m.server"].encode("ascii")) + @attr.s class _RoutingResult(object): diff --git a/tests/http/__init__.py b/tests/http/__init__.py index e69de29bb2..ee8010f598 100644 --- a/tests/http/__init__.py +++ b/tests/http/__init__.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os.path + +from OpenSSL import SSL + + +def get_test_cert_file(): + """get the path to the test cert""" + + # the cert file itself is made with: + # + # openssl req -x509 -newkey rsa:4096 -keyout server.pem -out server.pem -days 36500 \ + # -nodes -subj '/CN=testserv' + return os.path.join( + os.path.dirname(__file__), + 'server.pem', + ) + + +class ServerTLSContext(object): + """A TLS Context which presents our test cert.""" + def __init__(self): + self.filename = get_test_cert_file() + + def getContext(self): + ctx = SSL.Context(SSL.TLSv1_METHOD) + ctx.use_certificate_file(self.filename) + ctx.use_privatekey_file(self.filename) + return ctx diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 53b52ace59..bd80dd0cb6 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -17,18 +17,21 @@ import logging from mock import Mock import treq +from zope.interface import implementer from twisted.internet import defer +from twisted.internet._sslverify import ClientTLSOptions, OpenSSLCertificateOptions from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory -from twisted.test.ssl_helpers import ServerTLSContext from twisted.web.http import HTTPChannel +from twisted.web.iweb import IPolicyForHTTPS from synapse.crypto.context_factory import ClientTLSOptionsFactory from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.http.federation.srv_resolver import Server from synapse.util.logcontext import LoggingContext +from tests.http import ServerTLSContext from tests.server import FakeTransport, ThreadedMemoryReactorClock from tests.unittest import TestCase @@ -44,6 +47,7 @@ class MatrixFederationAgentTests(TestCase): self.agent = MatrixFederationAgent( reactor=self.reactor, tls_client_options_factory=ClientTLSOptionsFactory(None), + _well_known_tls_policy=TrustingTLSPolicyForHTTPS(), _srv_resolver=self.mock_resolver, ) @@ -65,10 +69,14 @@ class MatrixFederationAgentTests(TestCase): # Normally this would be done by the TCP socket code in Twisted, but we are # stubbing that out here. client_protocol = client_factory.buildProtocol(None) - client_protocol.makeConnection(FakeTransport(server_tls_protocol, self.reactor)) + client_protocol.makeConnection( + FakeTransport(server_tls_protocol, self.reactor, client_protocol), + ) # tell the server tls protocol to send its stuff back to the client, too - server_tls_protocol.makeConnection(FakeTransport(client_protocol, self.reactor)) + server_tls_protocol.makeConnection( + FakeTransport(client_protocol, self.reactor, server_tls_protocol), + ) # give the reactor a pump to get the TLS juices flowing. self.reactor.pump((0.1,)) @@ -101,9 +109,49 @@ class MatrixFederationAgentTests(TestCase): try: fetch_res = yield fetch_d defer.returnValue(fetch_res) + except Exception as e: + logger.info("Fetch of %s failed: %s", uri.decode("ascii"), e) + raise finally: _check_logcontext(context) + def _handle_well_known_connection(self, client_factory, expected_sni, target_server): + """Handle an outgoing HTTPs connection: wire it up to a server, check that the + request is for a .well-known, and send the response. + + Args: + client_factory (IProtocolFactory): outgoing connection + expected_sni (bytes): SNI that we expect the outgoing connection to send + target_server (bytes): target server that we should redirect to in the + .well-known response. + """ + # make the connection for .well-known + well_known_server = self._make_connection( + client_factory, + expected_sni=expected_sni, + ) + # check the .well-known request and send a response + self.assertEqual(len(well_known_server.requests), 1) + request = well_known_server.requests[0] + self._send_well_known_response(request, target_server) + + def _send_well_known_response(self, request, target_server): + """Check that an incoming request looks like a valid .well-known request, and + send back the response. + """ + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/.well-known/matrix/server') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'testserv'], + ) + # send back a response + request.responseHeaders.setRawHeaders(b'Content-Type', [b'application/json']) + request.write(b'{ "m.server": "%s" }' % (target_server,)) + request.finish() + + self.reactor.pump((0.1, )) + def test_get(self): """ happy-path test of a GET request with an explicit port @@ -283,9 +331,9 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - def test_get_hostname_no_srv(self): + def test_get_no_srv_no_well_known(self): """ - Test the behaviour when the server name has no port, and no SRV record + Test the behaviour when the server name has no port, no SRV, and no well-known """ self.mock_resolver.resolve_service.side_effect = lambda _: [] @@ -300,11 +348,24 @@ class MatrixFederationAgentTests(TestCase): b"_matrix._tcp.testserv", ) - # Make sure treq is trying to connect + # there should be an attempt to connect on port 443 for the .well-known clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients[0] self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + # fonx the connection + client_factory.clientConnectionFailed(None, Exception("nope")) + + # attemptdelay on the hostnameendpoint is 0.3, so takes that long before the + # .well-known request fails. + self.reactor.pump((0.4,)) + + # we should fall back to a direct connection + self.assertEqual(len(clients), 2) + (host, port, client_factory, _timeout, _bindAddress) = clients[1] + self.assertEqual(host, '1.2.3.4') self.assertEqual(port, 8448) # make a test server, and wire up the client @@ -327,6 +388,67 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_get_well_known(self): + """Test the behaviour when the server name has no port and no SRV record, but + the .well-known redirects elsewhere + """ + + self.mock_resolver.resolve_service.side_effect = lambda _: [] + self.reactor.lookups["testserv"] = "1.2.3.4" + self.reactor.lookups["target-server"] = "1::f" + + test_d = self._make_get_request(b"matrix://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.testserv", + ) + self.mock_resolver.resolve_service.reset_mock() + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + self._handle_well_known_connection( + client_factory, expected_sni=b"testserv", target_server=b"target-server", + ) + + # there should be another SRV lookup + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.target-server", + ) + + # now we should get a connection to the target server + self.assertEqual(len(clients), 2) + (host, port, client_factory, _timeout, _bindAddress) = clients[1] + self.assertEqual(host, '1::f') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'target-server', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'target-server'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def test_get_hostname_srv(self): """ Test the behaviour when there is a single SRV record @@ -372,6 +494,71 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_get_well_known_srv(self): + """Test the behaviour when the server name has no port and no SRV record, but + the .well-known redirects to a place where there is a SRV. + """ + + self.mock_resolver.resolve_service.side_effect = lambda _: [] + self.reactor.lookups["testserv"] = "1.2.3.4" + self.reactor.lookups["srvtarget"] = "5.6.7.8" + + test_d = self._make_get_request(b"matrix://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.testserv", + ) + self.mock_resolver.resolve_service.reset_mock() + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + self.mock_resolver.resolve_service.side_effect = lambda _: [ + Server(host=b"srvtarget", port=8443), + ] + + self._handle_well_known_connection( + client_factory, expected_sni=b"testserv", target_server=b"target-server", + ) + + # there should be another SRV lookup + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.target-server", + ) + + # now we should get a connection to the target of the SRV record + self.assertEqual(len(clients), 2) + (host, port, client_factory, _timeout, _bindAddress) = clients[1] + self.assertEqual(host, '5.6.7.8') + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'target-server', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'target-server'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def test_idna_servername(self): """test the behaviour when the server name has idna chars in""" @@ -390,11 +577,25 @@ class MatrixFederationAgentTests(TestCase): b"_matrix._tcp.xn--bcher-kva.com", ) - # Make sure treq is trying to connect + # there should be an attempt to connect on port 443 for the .well-known clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients[0] self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + # fonx the connection + client_factory.clientConnectionFailed(None, Exception("nope")) + + # attemptdelay on the hostnameendpoint is 0.3, so takes that long before the + # .well-known request fails. + self.reactor.pump((0.4,)) + + # We should fall back to port 8448 + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 2) + (host, port, client_factory, _timeout, _bindAddress) = clients[1] + self.assertEqual(host, '1.2.3.4') self.assertEqual(port, 8448) # make a test server, and wire up the client @@ -492,3 +693,11 @@ def _build_test_server(): def _log_request(request): """Implements Factory.log, which is expected by Request.finish""" logger.info("Completed request %s", request) + + +@implementer(IPolicyForHTTPS) +class TrustingTLSPolicyForHTTPS(object): + """An IPolicyForHTTPS which doesn't do any certificate verification""" + def creatorForNetloc(self, hostname, port): + certificateOptions = OpenSSLCertificateOptions() + return ClientTLSOptions(hostname, certificateOptions.getContext()) diff --git a/tests/http/server.pem b/tests/http/server.pem new file mode 100644 index 0000000000..0584cf1a80 --- /dev/null +++ b/tests/http/server.pem @@ -0,0 +1,81 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCgF43/3lAgJ+p0 +x7Rn8UcL8a4fctvdkikvZrCngw96LkB34Evfq8YGWlOVjU+f9naUJLAKMatmAfEN +r+rMX4VOXmpTwuu6iLtqwreUrRFMESyrmvQxa15p+y85gkY0CFmXMblv6ORbxHTG +ncBGwST4WK4Poewcgt6jcISFCESTUKu1zc3cw1ANIDRyDLB5K44KwIe36dcKckyN +Kdtv4BJ+3fcIZIkPJH62zqCypgFF1oiFt40uJzClxgHdJZlKYpgkfnDTckw4Y/Mx +9k8BbE310KAzUNMV9H7I1eEolzrNr66FQj1eN64X/dqO8lTbwCqAd4diCT4sIUk0 +0SVsAUjNd3g8j651hx+Qb1t8fuOjrny8dmeMxtUgIBHoQcpcj76R55Fs7KZ9uar0 +8OFTyGIze51W1jG2K/7/5M1zxIqrA+7lsXu5OR81s7I+Ng/UUAhiHA/z+42/aiNa +qEuk6tqj3rHfLctnCbtZ+JrRNqSSwEi8F0lMA021ivEd2eJV+284OyJjhXOmKHrX +QADHrmS7Sh4syTZvRNm9n+qWID0KdDr2Sji/KnS3Enp44HDQ4xriT6/xhwEGsyuX +oH5aAkdLznulbWkHBbyx1SUQSTLpOqzaioF9m1vRrLsFvrkrY3D253mPJ5eU9HM/ +dilduFcUgj4rz+6cdXUAh+KK/v95zwIDAQABAoICAFG5tJPaOa0ws0/KYx5s3YgL +aIhFalhCNSQtmCDrlwsYcXDA3/rfBchYdDL0YKGYgBBAal3J3WXFt/j0xThvyu2m +5UC9UPl4s7RckrsjXqEmY1d3UxGnbhtMT19cUdpeKN42VCP9EBaIw9Rg07dLAkSF +gNYaIx6q8F0fI4eGIPvTQtUcqur4CfWpaxyNvckdovV6M85/YXfDwbCOnacPDGIX +jfSK3i0MxGMuOHr6o8uzKR6aBUh6WStHWcw7VXXTvzdiFNbckmx3Gb93rf1b/LBw +QFfx+tBKcC62gKroCOzXso/0sL9YTVeSD/DJZOiJwSiz3Dj/3u1IUMbVvfTU8wSi +CYS7Z+jHxwSOCSSNTXm1wO/MtDsNKbI1+R0cohr/J9pOMQvrVh1+2zSDOFvXAQ1S +yvjn+uqdmijRoV2VEGVHd+34C+ci7eJGAhL/f92PohuuFR2shUETgGWzpACZSJwg +j1d90Hs81hj07vWRb+xCeDh00vimQngz9AD8vYvv/S4mqRGQ6TZdfjLoUwSTg0JD +6sQgRXX026gQhLhn687vLKZfHwzQPZkpQdxOR0dTZ/ho/RyGGRJXH4kN4cA2tPr+ +AKYQ29YXGlEzGG7OqikaZcprNWG6UFgEpuXyBxCgp9r4ladZo3J+1Rhgus8ZYatd +uO98q3WEBmP6CZ2n32mBAoIBAQDS/c/ybFTos0YpGHakwdmSfj5OOQJto2y8ywfG +qDHwO0ebcpNnS1+MA+7XbKUQb/3Iq7iJljkkzJG2DIJ6rpKynYts1ViYpM7M/t0T +W3V1gvUcUL62iqkgws4pnpWmubFkqV31cPSHcfIIclnzeQ1aOEGsGHNAvhty0ciC +DnkJACbqApvopFLOR5f6UFTtKExE+hDH0WqgpsCAKJ1L4g6pBzZatI32/CN9JEVU +tDbxLV75hHlFFjUrG7nT1rPyr/gI8Ceh9/2xeXPfjJUR0PrG3U1nwLqUCZkvFzO6 +XpN2+A+/v4v5xqMjKDKDFy1oq6SCMomwv/viw6wl/84TMbolAoIBAQDCPiMecnR8 +REik6tqVzQO/uSe9ZHjz6J15t5xdwaI6HpSwLlIkQPkLTjyXtFpemK5DOYRxrJvQ +remfrZrN2qtLlb/DKpuGPWRsPOvWCrSuNEp48ivUehtclljrzxAFfy0sM+fWeJ48 +nTnR+td9KNhjNtZixzWdAy/mE+jdaMsXVnk66L73Uz+2WsnvVMW2R6cpCR0F2eP/ +B4zDWRqlT2w47sePAB81mFYSQLvPC6Xcgg1OqMubfiizJI49c8DO6Jt+FFYdsxhd +kG52Eqa/Net6rN3ueiS6yXL5TU3Y6g96bPA2KyNCypucGcddcBfqaiVx/o4AH6yT +NrdsrYtyvk/jAoIBAQDHUwKVeeRJJbvdbQAArCV4MI155n+1xhMe1AuXkCQFWGtQ +nlBE4D72jmyf1UKnIbW2Uwv15xY6/ouVWYIWlj9+QDmMaozVP7Uiko+WDuwLRNl8 +k4dn+dzHV2HejbPBG2JLv3lFOx23q1zEwArcaXrExaq9Ayg2fKJ/uVHcFAIiD6Oz +pR1XDY4w1A/uaN+iYFSVQUyDCQLbnEz1hej73CaPZoHh9Pq83vxD5/UbjVjuRTeZ +L55FNzKpc/r89rNvTPBcuUwnxplDhYKDKVNWzn9rSXwrzTY2Tk8J3rh+k4RqevSd +6D47jH1n5Dy7/TRn0ueKHGZZtTUnyEUkbOJo3ayFAoIBAHKDyZaQqaX9Z8p6fwWj +yVsFoK0ih8BcWkLBAdmwZ6DWGJjJpjmjaG/G3ygc9s4gO1R8m12dAnuDnGE8KzDD +gwtbrKM2Alyg4wyA2hTlWOH/CAzH0RlCJ9Fs/d1/xJVJBeuyajLiB3/6vXTS6qnq +I7BSSxAPG8eGcn21LSsjNeB7ZZtaTgNnu/8ZBUYo9yrgkWc67TZe3/ChldYxOOlO +qqHh/BqNWtjxB4VZTp/g4RbgQVInZ2ozdXEv0v/dt0UEk29ANAjsZif7F3RayJ2f +/0TilzCaJ/9K9pKNhaClVRy7Dt8QjYg6BIWCGSw4ApF7pLnQ9gySn95mersCkVzD +YDsCggEAb0E/TORjQhKfNQvahyLfQFm151e+HIoqBqa4WFyfFxe/IJUaLH/JSSFw +VohbQqPdCmaAeuQ8ERL564DdkcY5BgKcax79fLLCOYP5bT11aQx6uFpfl2Dcm6Z9 +QdCRI4jzPftsd5fxLNH1XtGyC4t6vTic4Pji2O71WgWzx0j5v4aeDY4sZQeFxqCV +/q7Ee8hem1Rn5RFHu14FV45RS4LAWl6wvf5pQtneSKzx8YL0GZIRRytOzdEfnGKr +FeUlAj5uL+5/p0ZEgM7gPsEBwdm8scF79qSUn8UWSoXNeIauF9D4BDg8RZcFFxka +KILVFsq3cQC+bEnoM4eVbjEQkGs1RQ== +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIIE/jCCAuagAwIBAgIJANFtVaGvJWZlMA0GCSqGSIb3DQEBCwUAMBMxETAPBgNV +BAMMCHRlc3RzZXJ2MCAXDTE5MDEyNzIyMDIzNloYDzIxMTkwMTAzMjIwMjM2WjAT +MREwDwYDVQQDDAh0ZXN0c2VydjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoC +ggIBAKAXjf/eUCAn6nTHtGfxRwvxrh9y292SKS9msKeDD3ouQHfgS9+rxgZaU5WN +T5/2dpQksAoxq2YB8Q2v6sxfhU5ealPC67qIu2rCt5StEUwRLKua9DFrXmn7LzmC +RjQIWZcxuW/o5FvEdMadwEbBJPhYrg+h7ByC3qNwhIUIRJNQq7XNzdzDUA0gNHIM +sHkrjgrAh7fp1wpyTI0p22/gEn7d9whkiQ8kfrbOoLKmAUXWiIW3jS4nMKXGAd0l +mUpimCR+cNNyTDhj8zH2TwFsTfXQoDNQ0xX0fsjV4SiXOs2vroVCPV43rhf92o7y +VNvAKoB3h2IJPiwhSTTRJWwBSM13eDyPrnWHH5BvW3x+46OufLx2Z4zG1SAgEehB +ylyPvpHnkWzspn25qvTw4VPIYjN7nVbWMbYr/v/kzXPEiqsD7uWxe7k5HzWzsj42 +D9RQCGIcD/P7jb9qI1qoS6Tq2qPesd8ty2cJu1n4mtE2pJLASLwXSUwDTbWK8R3Z +4lX7bzg7ImOFc6YoetdAAMeuZLtKHizJNm9E2b2f6pYgPQp0OvZKOL8qdLcSenjg +cNDjGuJPr/GHAQazK5egfloCR0vOe6VtaQcFvLHVJRBJMuk6rNqKgX2bW9GsuwW+ +uStjcPbneY8nl5T0cz92KV24VxSCPivP7px1dQCH4or+/3nPAgMBAAGjUzBRMB0G +A1UdDgQWBBQcQZpzLzTk5KdS/Iz7sGCV7gTd/zAfBgNVHSMEGDAWgBQcQZpzLzTk +5KdS/Iz7sGCV7gTd/zAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IC +AQAr/Pgha57jqYsDDX1LyRrVdqoVBpLBeB7x/p9dKYm7S6tBTDFNMZ0SZyQP8VEG +7UoC9/OQ9nCdEMoR7ZKpQsmipwcIqpXHS6l4YOkf5EEq5jpMgvlEesHmBJJeJew/ +FEPDl1bl8d0tSrmWaL3qepmwzA+2lwAAouWk2n+rLiP8CZ3jZeoTXFqYYrUlEqO9 +fHMvuWqTV4KCSyNY+GWCrnHetulgKHlg+W2J1mZnrCKcBhWf9C2DesTJO+JldIeM +ornTFquSt21hZi+k3aySuMn2N3MWiNL8XsZVsAnPSs0zA+2fxjJkShls8Gc7cCvd +a6XrNC+PY6pONguo7rEU4HiwbvnawSTngFFglmH/ImdA/HkaAekW6o82aI8/UxFx +V9fFMO3iKDQdOrg77hI1bx9RlzKNZZinE2/Pu26fWd5d2zqDWCjl8ykGQRAfXgYN +H3BjgyXLl+ao5/pOUYYtzm3ruTXTgRcy5hhL6hVTYhSrf9vYh4LNIeXNKnZ78tyG +TX77/kU2qXhBGCFEUUMqUNV/+ITir2lmoxVjknt19M07aGr8C7SgYt6Rs+qDpMiy +JurgvRh8LpVq4pHx1efxzxCFmo58DMrG40I0+CF3y/niNpOb1gp2wAqByRiORkds +f0ytW6qZ0TpHbD6gOtQLYDnhx3ISuX+QYSekVwQUpffeWQ== +-----END CERTIFICATE----- diff --git a/tests/server.py b/tests/server.py index 6adcc73f91..3d7ae9875c 100644 --- a/tests/server.py +++ b/tests/server.py @@ -354,6 +354,11 @@ class FakeTransport(object): :type: twisted.internet.interfaces.IReactorTime """ + _protocol = attr.ib(default=None) + """The Protocol which is producing data for this transport. Optional, but if set + will get called back for connectionLost() notifications etc. + """ + disconnecting = False buffer = attr.ib(default=b'') producer = attr.ib(default=None) @@ -364,8 +369,12 @@ class FakeTransport(object): def getHost(self): return None - def loseConnection(self): - self.disconnecting = True + def loseConnection(self, reason=None): + logger.info("FakeTransport: loseConnection(%s)", reason) + if not self.disconnecting: + self.disconnecting = True + if self._protocol: + self._protocol.connectionLost(reason) def abortConnection(self): self.disconnecting = True -- cgit 1.5.1 From bc5f6e1797057e3acea692b926305982390424a3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 30 Jan 2019 10:55:25 +0000 Subject: Add a caching layer to .well-known responses (#4516) --- changelog.d/4516.feature | 1 + synapse/http/federation/matrix_federation_agent.py | 90 +++++++++++- synapse/util/caches/ttlcache.py | 161 +++++++++++++++++++++ .../federation/test_matrix_federation_agent.py | 150 ++++++++++++++++++- tests/server.py | 18 ++- tests/util/caches/test_ttlcache.py | 83 +++++++++++ 6 files changed, 493 insertions(+), 10 deletions(-) create mode 100644 changelog.d/4516.feature create mode 100644 synapse/util/caches/ttlcache.py create mode 100644 tests/util/caches/test_ttlcache.py (limited to 'tests/server.py') diff --git a/changelog.d/4516.feature b/changelog.d/4516.feature new file mode 100644 index 0000000000..bda713adf9 --- /dev/null +++ b/changelog.d/4516.feature @@ -0,0 +1 @@ +Implement MSC1708 (.well-known routing for server-server federation) \ No newline at end of file diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index f81fcd4301..29804efe10 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -14,6 +14,8 @@ # limitations under the License. import json import logging +import random +import time import attr from netaddr import IPAddress @@ -22,13 +24,29 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import URI, Agent, HTTPConnectionPool, readBody +from twisted.web.http import stringToDatetime from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list +from synapse.util.caches.ttlcache import TTLCache from synapse.util.logcontext import make_deferred_yieldable +# period to cache .well-known results for by default +WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600 + +# jitter to add to the .well-known default cache ttl +WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 + +# period to cache failure to fetch .well-known for +WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 + +# cap for .well-known cache period +WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 + + logger = logging.getLogger(__name__) +well_known_cache = TTLCache('well-known') @implementer(IAgent) @@ -57,6 +75,7 @@ class MatrixFederationAgent(object): self, reactor, tls_client_options_factory, _well_known_tls_policy=None, _srv_resolver=None, + _well_known_cache=well_known_cache, ): self._reactor = reactor self._tls_client_options_factory = tls_client_options_factory @@ -77,6 +96,8 @@ class MatrixFederationAgent(object): _well_known_agent = Agent(self._reactor, pool=self._pool, **agent_args) self._well_known_agent = _well_known_agent + self._well_known_cache = _well_known_cache + @defer.inlineCallbacks def request(self, method, uri, headers=None, bodyProducer=None): """ @@ -259,7 +280,14 @@ class MatrixFederationAgent(object): Deferred[bytes|None]: either the new server name, from the .well-known, or None if there was no .well-known file. """ - # FIXME: add a cache + try: + cached = self._well_known_cache[server_name] + defer.returnValue(cached) + except KeyError: + pass + + # TODO: should we linearise so that we don't end up doing two .well-known requests + # for the same server in parallel? uri = b"https://%s/.well-known/matrix/server" % (server_name, ) uri_str = uri.decode("ascii") @@ -270,12 +298,14 @@ class MatrixFederationAgent(object): ) except Exception as e: logger.info("Connection error fetching %s: %s", uri_str, e) + self._well_known_cache.set(server_name, None, WELL_KNOWN_INVALID_CACHE_PERIOD) defer.returnValue(None) body = yield make_deferred_yieldable(readBody(response)) if response.code != 200: logger.info("Error response %i from %s", response.code, uri_str) + self._well_known_cache.set(server_name, None, WELL_KNOWN_INVALID_CACHE_PERIOD) defer.returnValue(None) try: @@ -287,7 +317,63 @@ class MatrixFederationAgent(object): raise Exception("Missing key 'm.server'") except Exception as e: raise Exception("invalid .well-known response from %s: %s" % (uri_str, e,)) - defer.returnValue(parsed_body["m.server"].encode("ascii")) + + result = parsed_body["m.server"].encode("ascii") + + cache_period = _cache_period_from_headers( + response.headers, + time_now=self._reactor.seconds, + ) + if cache_period is None: + cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD + # add some randomness to the TTL to avoid a stampeding herd every hour after + # startup + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + else: + cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) + + if cache_period > 0: + self._well_known_cache.set(server_name, result, cache_period) + + defer.returnValue(result) + + +def _cache_period_from_headers(headers, time_now=time.time): + cache_controls = _parse_cache_control(headers) + + if b'no-store' in cache_controls: + return 0 + + if b'max-age' in cache_controls: + try: + max_age = int(cache_controls[b'max-age']) + return max_age + except ValueError: + pass + + expires = headers.getRawHeaders(b'expires') + if expires is not None: + try: + expires_date = stringToDatetime(expires[-1]) + return expires_date - time_now() + except ValueError: + # RFC7234 says 'A cache recipient MUST interpret invalid date formats, + # especially the value "0", as representing a time in the past (i.e., + # "already expired"). + return 0 + + return None + + +def _parse_cache_control(headers): + cache_controls = {} + for hdr in headers.getRawHeaders(b'cache-control', []): + for directive in hdr.split(b','): + splits = [x.strip() for x in directive.split(b'=', 1)] + k = splits[0].lower() + v = splits[1] if len(splits) > 1 else None + cache_controls[k] = v + return cache_controls @attr.s diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py new file mode 100644 index 0000000000..5ba1862506 --- /dev/null +++ b/synapse/util/caches/ttlcache.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# Copyright 2015, 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time + +import attr +from sortedcontainers import SortedList + +from synapse.util.caches import register_cache + +logger = logging.getLogger(__name__) + +SENTINEL = object() + + +class TTLCache(object): + """A key/value cache implementation where each entry has its own TTL""" + + def __init__(self, cache_name, timer=time.time): + # map from key to _CacheEntry + self._data = {} + + # the _CacheEntries, sorted by expiry time + self._expiry_list = SortedList() + + self._timer = timer + + self._metrics = register_cache("ttl", cache_name, self) + + def set(self, key, value, ttl): + """Add/update an entry in the cache + + Args: + key: key for this entry + value: value for this entry + ttl (float): TTL for this entry, in seconds + """ + expiry = self._timer() + ttl + + self.expire() + e = self._data.pop(key, SENTINEL) + if e != SENTINEL: + self._expiry_list.remove(e) + + entry = _CacheEntry(expiry_time=expiry, key=key, value=value) + self._data[key] = entry + self._expiry_list.add(entry) + + def get(self, key, default=SENTINEL): + """Get a value from the cache + + Args: + key: key to look up + default: default value to return, if key is not found. If not set, and the + key is not found, a KeyError will be raised + + Returns: + value from the cache, or the default + """ + self.expire() + e = self._data.get(key, SENTINEL) + if e == SENTINEL: + self._metrics.inc_misses() + if default == SENTINEL: + raise KeyError(key) + return default + self._metrics.inc_hits() + return e.value + + def get_with_expiry(self, key): + """Get a value, and its expiry time, from the cache + + Args: + key: key to look up + + Returns: + Tuple[Any, float]: the value from the cache, and the expiry time + + Raises: + KeyError if the entry is not found + """ + self.expire() + try: + e = self._data[key] + except KeyError: + self._metrics.inc_misses() + raise + self._metrics.inc_hits() + return e.value, e.expiry_time + + def pop(self, key, default=SENTINEL): + """Remove a value from the cache + + If key is in the cache, remove it and return its value, else return default. + If default is not given and key is not in the cache, a KeyError is raised. + + Args: + key: key to look up + default: default value to return, if key is not found. If not set, and the + key is not found, a KeyError will be raised + + Returns: + value from the cache, or the default + """ + self.expire() + e = self._data.pop(key, SENTINEL) + if e == SENTINEL: + self._metrics.inc_misses() + if default == SENTINEL: + raise KeyError(key) + return default + self._expiry_list.remove(e) + self._metrics.inc_hits() + return e.value + + def __getitem__(self, key): + return self.get(key) + + def __delitem__(self, key): + self.pop(key) + + def __contains__(self, key): + return key in self._data + + def __len__(self): + self.expire() + return len(self._data) + + def expire(self): + """Run the expiry on the cache. Any entries whose expiry times are due will + be removed + """ + now = self._timer() + while self._expiry_list: + first_entry = self._expiry_list[0] + if first_entry.expiry_time - now > 0.0: + break + del self._data[first_entry.key] + del self._expiry_list[0] + + +@attr.s(frozen=True, slots=True) +class _CacheEntry(object): + """TTLCache entry""" + # expiry_time is the first attribute, so that entries are sorted by expiry. + expiry_time = attr.ib() + key = attr.ib() + value = attr.ib() diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 11ea8ef10c..fe459ea6e3 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -24,11 +24,16 @@ from twisted.internet._sslverify import ClientTLSOptions, OpenSSLCertificateOpti from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.web.http import HTTPChannel +from twisted.web.http_headers import Headers from twisted.web.iweb import IPolicyForHTTPS from synapse.crypto.context_factory import ClientTLSOptionsFactory -from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent +from synapse.http.federation.matrix_federation_agent import ( + MatrixFederationAgent, + _cache_period_from_headers, +) from synapse.http.federation.srv_resolver import Server +from synapse.util.caches.ttlcache import TTLCache from synapse.util.logcontext import LoggingContext from tests.http import ServerTLSContext @@ -44,11 +49,14 @@ class MatrixFederationAgentTests(TestCase): self.mock_resolver = Mock() + self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) + self.agent = MatrixFederationAgent( reactor=self.reactor, tls_client_options_factory=ClientTLSOptionsFactory(None), _well_known_tls_policy=TrustingTLSPolicyForHTTPS(), _srv_resolver=self.mock_resolver, + _well_known_cache=self.well_known_cache, ) def _make_connection(self, client_factory, expected_sni): @@ -115,7 +123,9 @@ class MatrixFederationAgentTests(TestCase): finally: _check_logcontext(context) - def _handle_well_known_connection(self, client_factory, expected_sni, target_server): + def _handle_well_known_connection( + self, client_factory, expected_sni, target_server, response_headers={}, + ): """Handle an outgoing HTTPs connection: wire it up to a server, check that the request is for a .well-known, and send the response. @@ -124,6 +134,8 @@ class MatrixFederationAgentTests(TestCase): expected_sni (bytes): SNI that we expect the outgoing connection to send target_server (bytes): target server that we should redirect to in the .well-known response. + Returns: + HTTPChannel: server impl """ # make the connection for .well-known well_known_server = self._make_connection( @@ -133,9 +145,10 @@ class MatrixFederationAgentTests(TestCase): # check the .well-known request and send a response self.assertEqual(len(well_known_server.requests), 1) request = well_known_server.requests[0] - self._send_well_known_response(request, target_server) + self._send_well_known_response(request, target_server, headers=response_headers) + return well_known_server - def _send_well_known_response(self, request, target_server): + def _send_well_known_response(self, request, target_server, headers={}): """Check that an incoming request looks like a valid .well-known request, and send back the response. """ @@ -146,6 +159,8 @@ class MatrixFederationAgentTests(TestCase): [b'testserv'], ) # send back a response + for k, v in headers.items(): + request.setHeader(k, v) request.write(b'{ "m.server": "%s" }' % (target_server,)) request.finish() @@ -448,6 +463,13 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + self.assertEqual(self.well_known_cache[b"testserv"], b"target-server") + + # check the cache expires + self.reactor.pump((25 * 3600,)) + self.well_known_cache.expire() + self.assertNotIn(b"testserv", self.well_known_cache) + def test_get_hostname_srv(self): """ Test the behaviour when there is a single SRV record @@ -661,6 +683,126 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + @defer.inlineCallbacks + def do_get_well_known(self, serv): + try: + result = yield self.agent._get_well_known(serv) + logger.info("Result from well-known fetch: %s", result) + except Exception as e: + logger.warning("Error fetching well-known: %s", e) + raise + defer.returnValue(result) + + def test_well_known_cache(self): + self.reactor.lookups["testserv"] = "1.2.3.4" + + fetch_d = self.do_get_well_known(b'testserv') + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + well_known_server = self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + response_headers={b'Cache-Control': b'max-age=10'}, + target_server=b"target-server", + ) + + r = self.successResultOf(fetch_d) + self.assertEqual(r, b'target-server') + + # close the tcp connection + well_known_server.loseConnection() + + # repeat the request: it should hit the cache + fetch_d = self.do_get_well_known(b'testserv') + r = self.successResultOf(fetch_d) + self.assertEqual(r, b'target-server') + + # expire the cache + self.reactor.pump((10.0,)) + + # now it should connect again + fetch_d = self.do_get_well_known(b'testserv') + + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 443) + + self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + target_server=b"other-server", + ) + + r = self.successResultOf(fetch_d) + self.assertEqual(r, b'other-server') + + +class TestCachePeriodFromHeaders(TestCase): + def test_cache_control(self): + # uppercase + self.assertEqual( + _cache_period_from_headers( + Headers({b'Cache-Control': [b'foo, Max-Age = 100, bar']}), + ), 100, + ) + + # missing value + self.assertIsNone(_cache_period_from_headers( + Headers({b'Cache-Control': [b'max-age=, bar']}), + )) + + # hackernews: bogus due to semicolon + self.assertIsNone(_cache_period_from_headers( + Headers({b'Cache-Control': [b'private; max-age=0']}), + )) + + # github + self.assertEqual( + _cache_period_from_headers( + Headers({b'Cache-Control': [b'max-age=0, private, must-revalidate']}), + ), 0, + ) + + # google + self.assertEqual( + _cache_period_from_headers( + Headers({b'cache-control': [b'private, max-age=0']}), + ), 0, + ) + + def test_expires(self): + self.assertEqual( + _cache_period_from_headers( + Headers({b'Expires': [b'Wed, 30 Jan 2019 07:35:33 GMT']}), + time_now=lambda: 1548833700 + ), 33, + ) + + # cache-control overrides expires + self.assertEqual( + _cache_period_from_headers( + Headers({ + b'cache-control': [b'max-age=10'], + b'Expires': [b'Wed, 30 Jan 2019 07:35:33 GMT'] + }), + time_now=lambda: 1548833700 + ), 10, + ) + + # invalid expires means immediate expiry + self.assertEqual( + _cache_period_from_headers( + Headers({b'Expires': [b'0']}), + ), 0, + ) + def _check_logcontext(context): current = LoggingContext.current_context() diff --git a/tests/server.py b/tests/server.py index 3d7ae9875c..fc1e76d146 100644 --- a/tests/server.py +++ b/tests/server.py @@ -360,6 +360,7 @@ class FakeTransport(object): """ disconnecting = False + disconnected = False buffer = attr.ib(default=b'') producer = attr.ib(default=None) @@ -370,14 +371,16 @@ class FakeTransport(object): return None def loseConnection(self, reason=None): - logger.info("FakeTransport: loseConnection(%s)", reason) if not self.disconnecting: + logger.info("FakeTransport: loseConnection(%s)", reason) self.disconnecting = True if self._protocol: self._protocol.connectionLost(reason) + self.disconnected = True def abortConnection(self): - self.disconnecting = True + logger.info("FakeTransport: abortConnection()") + self.loseConnection() def pauseProducing(self): if not self.producer: @@ -416,9 +419,16 @@ class FakeTransport(object): # TLSMemoryBIOProtocol return + if self.disconnected: + return + logger.info("%s->%s: %s", self._protocol, self.other, self.buffer) + if getattr(self.other, "transport") is not None: - self.other.dataReceived(self.buffer) - self.buffer = b"" + try: + self.other.dataReceived(self.buffer) + self.buffer = b"" + except Exception as e: + logger.warning("Exception writing to protocol: %s", e) return self._reactor.callLater(0.0, _write) diff --git a/tests/util/caches/test_ttlcache.py b/tests/util/caches/test_ttlcache.py new file mode 100644 index 0000000000..03b3c15db6 --- /dev/null +++ b/tests/util/caches/test_ttlcache.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import Mock + +from synapse.util.caches.ttlcache import TTLCache + +from tests import unittest + + +class CacheTestCase(unittest.TestCase): + def setUp(self): + self.mock_timer = Mock(side_effect=lambda: 100.0) + self.cache = TTLCache("test_cache", self.mock_timer) + + def test_get(self): + """simple set/get tests""" + self.cache.set('one', '1', 10) + self.cache.set('two', '2', 20) + self.cache.set('three', '3', 30) + + self.assertEqual(len(self.cache), 3) + + self.assertTrue('one' in self.cache) + self.assertEqual(self.cache.get('one'), '1') + self.assertEqual(self.cache['one'], '1') + self.assertEqual(self.cache.get_with_expiry('one'), ('1', 110)) + self.assertEqual(self.cache._metrics.hits, 3) + self.assertEqual(self.cache._metrics.misses, 0) + + self.cache.set('two', '2.5', 20) + self.assertEqual(self.cache['two'], '2.5') + self.assertEqual(self.cache._metrics.hits, 4) + + # non-existent-item tests + self.assertEqual(self.cache.get('four', '4'), '4') + self.assertIs(self.cache.get('four', None), None) + + with self.assertRaises(KeyError): + self.cache['four'] + + with self.assertRaises(KeyError): + self.cache.get('four') + + with self.assertRaises(KeyError): + self.cache.get_with_expiry('four') + + self.assertEqual(self.cache._metrics.hits, 4) + self.assertEqual(self.cache._metrics.misses, 5) + + def test_expiry(self): + self.cache.set('one', '1', 10) + self.cache.set('two', '2', 20) + self.cache.set('three', '3', 30) + + self.assertEqual(len(self.cache), 3) + self.assertEqual(self.cache['one'], '1') + self.assertEqual(self.cache['two'], '2') + + # enough for the first entry to expire, but not the rest + self.mock_timer.side_effect = lambda: 110.0 + + self.assertEqual(len(self.cache), 2) + self.assertFalse('one' in self.cache) + self.assertEqual(self.cache['two'], '2') + self.assertEqual(self.cache['three'], '3') + + self.assertEqual(self.cache.get_with_expiry('two'), ('2', 120)) + + self.assertEqual(self.cache._metrics.hits, 5) + self.assertEqual(self.cache._metrics.misses, 0) -- cgit 1.5.1