diff options
Diffstat (limited to '')
68 files changed, 1415 insertions, 689 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py index e62901b761..9dbe0b9f10 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,14 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.33.3" +try: + from twisted.internet import protocol + from twisted.internet.protocol import Factory + from twisted.names.dns import DNSDatagramProtocol + protocol.Factory.noisy = False + Factory.noisy = False + DNSDatagramProtocol.noisy = False +except ImportError: + pass + +__version__ = "0.33.4" diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 186831e118..a31a9a17e0 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -251,6 +251,7 @@ class FilterCollection(object): "include_leave", False ) self.event_fields = filter_json.get("event_fields", []) + self.event_format = filter_json.get("event_format", "client") def __repr__(self): return "<FilterCollection %s>" % (json.dumps(self._filter_json),) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3eb5b663de..3241ded188 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -307,6 +307,10 @@ class SynapseHomeServer(HomeServer): # Gauges to expose monthly active user control metrics current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU") max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit") +registered_reserved_users_mau_gauge = Gauge( + "synapse_admin_mau:registered_reserved_users", + "Registered users with reserved threepids" +) def setup(config_options): @@ -453,6 +457,10 @@ def run(hs): stats["homeserver"] = hs.config.server_name stats["timestamp"] = now stats["uptime_seconds"] = uptime + version = sys.version_info + stats["python_version"] = "{}.{}.{}".format( + version.major, version.minor, version.micro + ) stats["total_users"] = yield hs.get_datastore().count_all_users() total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users() @@ -531,10 +539,14 @@ def run(hs): @defer.inlineCallbacks def generate_monthly_active_users(): - count = 0 + current_mau_count = 0 + reserved_count = 0 + store = hs.get_datastore() if hs.config.limit_usage_by_mau: - count = yield hs.get_datastore().get_monthly_active_count() - current_mau_gauge.set(float(count)) + current_mau_count = yield store.get_monthly_active_count() + reserved_count = yield store.get_registered_reserved_users_count() + current_mau_gauge.set(float(current_mau_count)) + registered_reserved_users_mau_gauge.set(float(reserved_count)) max_mau_gauge.set(float(hs.config.max_mau_value)) hs.get_datastore().initialise_reserved_users( diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 6980e5890e..9ccc5a80fc 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import urllib + +from six.moves import urllib from prometheus_client import Counter @@ -98,7 +99,7 @@ class ApplicationServiceApi(SimpleHttpClient): def query_user(self, service, user_id): if service.url is None: defer.returnValue(False) - uri = service.url + ("/users/%s" % urllib.quote(user_id)) + uri = service.url + ("/users/%s" % urllib.parse.quote(user_id)) response = None try: response = yield self.get_json(uri, { @@ -119,7 +120,7 @@ class ApplicationServiceApi(SimpleHttpClient): def query_alias(self, service, alias): if service.url is None: defer.returnValue(False) - uri = service.url + ("/rooms/%s" % urllib.quote(alias)) + uri = service.url + ("/rooms/%s" % urllib.parse.quote(alias)) response = None try: response = yield self.get_json(uri, { @@ -153,7 +154,7 @@ class ApplicationServiceApi(SimpleHttpClient): service.url, APP_SERVICE_PREFIX, kind, - urllib.quote(protocol) + urllib.parse.quote(protocol) ) try: response = yield self.get_json(uri, fields) @@ -188,7 +189,7 @@ class ApplicationServiceApi(SimpleHttpClient): uri = "%s%s/thirdparty/protocol/%s" % ( service.url, APP_SERVICE_PREFIX, - urllib.quote(protocol) + urllib.parse.quote(protocol) ) try: info = yield self.get_json(uri, {}) @@ -228,7 +229,7 @@ class ApplicationServiceApi(SimpleHttpClient): txn_id = str(txn_id) uri = service.url + ("/transactions/%s" % - urllib.quote(txn_id)) + urllib.parse.quote(txn_id)) try: yield self.put_json( uri=uri, diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 2fd9c48abf..b8d5690f2b 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -21,7 +21,7 @@ from .consent_config import ConsentConfig from .database import DatabaseConfig from .emailconfig import EmailConfig from .groups import GroupsConfig -from .jwt import JWTConfig +from .jwt_config import JWTConfig from .key import KeyConfig from .logger import LoggingConfig from .metrics import MetricsConfig diff --git a/synapse/config/jwt.py b/synapse/config/jwt_config.py index 51e7f7e003..51e7f7e003 100644 --- a/synapse/config/jwt.py +++ b/synapse/config/jwt_config.py diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 3f187adfc8..e9a936118d 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -227,7 +227,22 @@ def setup_logging(config, use_worker_options=False): # # However this may not be too much of a problem if we are just writing to a file. observer = STDLibLogObserver() + + def _log(event): + + if "log_text" in event: + if event["log_text"].startswith("DNSDatagramProtocol starting on "): + return + + if event["log_text"].startswith("(UDP Port "): + return + + if event["log_text"].startswith("Timing out client"): + return + + return observer(event) + globalLogBeginner.beginLoggingTo( - [observer], + [_log], redirectStandardIO=not config.no_redirect_stdio, ) diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 1a391adec1..02b76dfcfb 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -123,6 +123,6 @@ class ClientTLSOptionsFactory(object): def get_options(self, host): return ClientTLSOptions( - host.decode('utf-8'), + host, CertificateOptions(verify=False).getContext() ) diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index e94400b8e2..57d4665e84 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -50,7 +50,7 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): defer.returnValue((server_response, server_certificate)) except SynapseKeyClientError as e: logger.warn("Error getting key for %r: %s", server_name, e) - if e.status.startswith("4"): + if e.status.startswith(b"4"): # Don't retry for 4xx responses. raise IOError("Cannot get key for %r" % server_name) except (ConnectError, DomainError) as e: @@ -82,6 +82,12 @@ class SynapseKeyClientProtocol(HTTPClient): self._peer = self.transport.getPeer() logger.debug("Connected to %s", self._peer) + if not isinstance(self.path, bytes): + self.path = self.path.encode('ascii') + + if not isinstance(self.host, bytes): + self.host = self.host.encode('ascii') + self.sendCommand(b"GET", self.path) if self.host: self.sendHeader(b"Host", self.host) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 30e2742102..d89f94c219 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -16,9 +16,10 @@ import hashlib import logging -import urllib from collections import namedtuple +from six.moves import urllib + from signedjson.key import ( decode_verify_key_bytes, encode_verify_key_base64, @@ -40,6 +41,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.crypto.keyclient import fetch_server_key from synapse.util import logcontext, unwrapFirstError from synapse.util.logcontext import ( + LoggingContext, PreserveLoggingContext, preserve_fn, run_in_background, @@ -216,23 +218,34 @@ class Keyring(object): servers have completed. Follows the synapse rules of logcontext preservation. """ + loop_count = 1 while True: wait_on = [ - self.key_downloads[server_name] + (server_name, self.key_downloads[server_name]) for server_name in server_names if server_name in self.key_downloads ] - if wait_on: - with PreserveLoggingContext(): - yield defer.DeferredList(wait_on) - else: + if not wait_on: break + logger.info( + "Waiting for existing lookups for %s to complete [loop %i]", + [w[0] for w in wait_on], loop_count, + ) + with PreserveLoggingContext(): + yield defer.DeferredList((w[1] for w in wait_on)) + + loop_count += 1 + + ctx = LoggingContext.current_context() def rm(r, server_name_): - self.key_downloads.pop(server_name_, None) + with PreserveLoggingContext(ctx): + logger.debug("Releasing key lookup lock on %s", server_name_) + self.key_downloads.pop(server_name_, None) return r for server_name, deferred in server_to_deferred.items(): + logger.debug("Got key lookup lock on %s", server_name) self.key_downloads[server_name] = deferred deferred.addBoth(rm, server_name) @@ -432,7 +445,7 @@ class Keyring(object): # an incoming request. query_response = yield self.client.post_json( destination=perspective_name, - path=b"/_matrix/key/v2/query", + path="/_matrix/key/v2/query", data={ u"server_keys": { server_name: { @@ -513,8 +526,8 @@ class Keyring(object): (response, tls_certificate) = yield fetch_server_key( server_name, self.hs.tls_client_options_factory, - path=(b"/_matrix/key/v2/server/%s" % ( - urllib.quote(requested_key_id), + path=("/_matrix/key/v2/server/%s" % ( + urllib.parse.quote(requested_key_id), )).encode("ascii"), ) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 51f9084b90..b782af6308 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import six + from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -147,6 +149,9 @@ class EventBase(object): def items(self): return list(self._event_dict.items()) + def keys(self): + return six.iterkeys(self._event_dict) + class FrozenEvent(EventBase): def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None): diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index c11798093d..b7ad729c63 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -13,17 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from collections import namedtuple import six from twisted.internet import defer +from twisted.internet.defer import DeferredList -from synapse.api.constants import MAX_DEPTH +from synapse.api.constants import MAX_DEPTH, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.crypto.event_signing import check_event_content_hash from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.http.servlet import assert_params_in_dict +from synapse.types import get_domain_from_id from synapse.util import logcontext, unwrapFirstError logger = logging.getLogger(__name__) @@ -133,34 +136,45 @@ class FederationBase(object): * throws a SynapseError if the signature check failed. The deferreds run their callbacks in the sentinel logcontext. """ - - redacted_pdus = [ - prune_event(pdu) - for pdu in pdus - ] - - deferreds = self.keyring.verify_json_objects_for_server([ - (p.origin, p.get_pdu_json()) - for p in redacted_pdus - ]) + deferreds = _check_sigs_on_pdus(self.keyring, pdus) ctx = logcontext.LoggingContext.current_context() - def callback(_, pdu, redacted): + def callback(_, pdu): with logcontext.PreserveLoggingContext(ctx): if not check_event_content_hash(pdu): - logger.warn( - "Event content has been tampered, redacting %s: %s", - pdu.event_id, pdu.get_pdu_json() - ) - return redacted + # let's try to distinguish between failures because the event was + # redacted (which are somewhat expected) vs actual ball-tampering + # incidents. + # + # This is just a heuristic, so we just assume that if the keys are + # about the same between the redacted and received events, then the + # received event was probably a redacted copy (but we then use our + # *actual* redacted copy to be on the safe side.) + redacted_event = prune_event(pdu) + if ( + set(redacted_event.keys()) == set(pdu.keys()) and + set(six.iterkeys(redacted_event.content)) + == set(six.iterkeys(pdu.content)) + ): + logger.info( + "Event %s seems to have been redacted; using our redacted " + "copy", + pdu.event_id, + ) + else: + logger.warning( + "Event %s content has been tampered, redacting", + pdu.event_id, pdu.get_pdu_json(), + ) + return redacted_event if self.spam_checker.check_event_for_spam(pdu): logger.warn( "Event contains spam, redacting %s: %s", pdu.event_id, pdu.get_pdu_json() ) - return redacted + return prune_event(pdu) return pdu @@ -168,21 +182,121 @@ class FederationBase(object): failure.trap(SynapseError) with logcontext.PreserveLoggingContext(ctx): logger.warn( - "Signature check failed for %s", - pdu.event_id, + "Signature check failed for %s: %s", + pdu.event_id, failure.getErrorMessage(), ) return failure - for deferred, pdu, redacted in zip(deferreds, pdus, redacted_pdus): + for deferred, pdu in zip(deferreds, pdus): deferred.addCallbacks( callback, errback, - callbackArgs=[pdu, redacted], + callbackArgs=[pdu], errbackArgs=[pdu], ) return deferreds +class PduToCheckSig(namedtuple("PduToCheckSig", [ + "pdu", "redacted_pdu_json", "event_id_domain", "sender_domain", "deferreds", +])): + pass + + +def _check_sigs_on_pdus(keyring, pdus): + """Check that the given events are correctly signed + + Args: + keyring (synapse.crypto.Keyring): keyring object to do the checks + pdus (Collection[EventBase]): the events to be checked + + Returns: + List[Deferred]: a Deferred for each event in pdus, which will either succeed if + the signatures are valid, or fail (with a SynapseError) if not. + """ + + # (currently this is written assuming the v1 room structure; we'll probably want a + # separate function for checking v2 rooms) + + # we want to check that the event is signed by: + # + # (a) the server which created the event_id + # + # (b) the sender's server. + # + # - except in the case of invites created from a 3pid invite, which are exempt + # from this check, because the sender has to match that of the original 3pid + # invite, but the event may come from a different HS, for reasons that I don't + # entirely grok (why do the senders have to match? and if they do, why doesn't the + # joining server ask the inviting server to do the switcheroo with + # exchange_third_party_invite?). + # + # That's pretty awful, since redacting such an invite will render it invalid + # (because it will then look like a regular invite without a valid signature), + # and signatures are *supposed* to be valid whether or not an event has been + # redacted. But this isn't the worst of the ways that 3pid invites are broken. + # + # let's start by getting the domain for each pdu, and flattening the event back + # to JSON. + pdus_to_check = [ + PduToCheckSig( + pdu=p, + redacted_pdu_json=prune_event(p).get_pdu_json(), + event_id_domain=get_domain_from_id(p.event_id), + sender_domain=get_domain_from_id(p.sender), + deferreds=[], + ) + for p in pdus + ] + + # first make sure that the event is signed by the event_id's domain + deferreds = keyring.verify_json_objects_for_server([ + (p.event_id_domain, p.redacted_pdu_json) + for p in pdus_to_check + ]) + + for p, d in zip(pdus_to_check, deferreds): + p.deferreds.append(d) + + # now let's look for events where the sender's domain is different to the + # event id's domain (normally only the case for joins/leaves), and add additional + # checks. + pdus_to_check_sender = [ + p for p in pdus_to_check + if p.sender_domain != p.event_id_domain and not _is_invite_via_3pid(p.pdu) + ] + + more_deferreds = keyring.verify_json_objects_for_server([ + (p.sender_domain, p.redacted_pdu_json) + for p in pdus_to_check_sender + ]) + + for p, d in zip(pdus_to_check_sender, more_deferreds): + p.deferreds.append(d) + + # replace lists of deferreds with single Deferreds + return [_flatten_deferred_list(p.deferreds) for p in pdus_to_check] + + +def _flatten_deferred_list(deferreds): + """Given a list of one or more deferreds, either return the single deferred, or + combine into a DeferredList. + """ + if len(deferreds) > 1: + return DeferredList(deferreds, fireOnOneErrback=True, consumeErrors=True) + else: + assert len(deferreds) == 1 + return deferreds[0] + + +def _is_invite_via_3pid(event): + return ( + event.type == EventTypes.Member + and event.membership == Membership.INVITE + and "third_party_invite" in event.content + ) + + def event_from_pdu_json(pdu_json, outlier=False): """Construct a FrozenEvent from an event json received over federation diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index c9f3c2d352..fe67b2ff42 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -271,10 +271,10 @@ class FederationClient(FederationBase): event_id, destination, e, ) except NotRetryingDestination as e: - logger.info(e.message) + logger.info(str(e)) continue except FederationDeniedError as e: - logger.info(e.message) + logger.info(str(e)) continue except Exception as e: pdu_attempts[destination] = now @@ -510,7 +510,7 @@ class FederationClient(FederationBase): else: logger.warn( "Failed to %s via %s: %i %s", - description, destination, e.code, e.message, + description, destination, e.code, e.args[0], ) except Exception: logger.warn( @@ -875,7 +875,7 @@ class FederationClient(FederationBase): except Exception as e: logger.exception( "Failed to send_third_party_invite via %s: %s", - destination, e.message + destination, str(e) ) raise RuntimeError("Failed to send to any server.") diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3e0cd294a1..dbee404ea7 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -99,7 +99,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - def on_incoming_transaction(self, transaction_data): + def on_incoming_transaction(self, origin, transaction_data): # keep this as early as possible to make the calculated origin ts as # accurate as possible. request_time = self._clock.time_msec() @@ -108,34 +108,33 @@ class FederationServer(FederationBase): if not transaction.transaction_id: raise Exception("Transaction missing transaction_id") - if not transaction.origin: - raise Exception("Transaction missing origin") logger.debug("[%s] Got transaction", transaction.transaction_id) # use a linearizer to ensure that we don't process the same transaction # multiple times in parallel. with (yield self._transaction_linearizer.queue( - (transaction.origin, transaction.transaction_id), + (origin, transaction.transaction_id), )): result = yield self._handle_incoming_transaction( - transaction, request_time, + origin, transaction, request_time, ) defer.returnValue(result) @defer.inlineCallbacks - def _handle_incoming_transaction(self, transaction, request_time): + def _handle_incoming_transaction(self, origin, transaction, request_time): """ Process an incoming transaction and return the HTTP response Args: + origin (unicode): the server making the request transaction (Transaction): incoming transaction request_time (int): timestamp that the HTTP request arrived at Returns: Deferred[(int, object)]: http response code and body """ - response = yield self.transaction_actions.have_responded(transaction) + response = yield self.transaction_actions.have_responded(origin, transaction) if response: logger.debug( @@ -149,7 +148,7 @@ class FederationServer(FederationBase): received_pdus_counter.inc(len(transaction.pdus)) - origin_host, _ = parse_server_name(transaction.origin) + origin_host, _ = parse_server_name(origin) pdus_by_room = {} @@ -190,7 +189,7 @@ class FederationServer(FederationBase): event_id = pdu.event_id try: yield self._handle_received_pdu( - transaction.origin, pdu + origin, pdu ) pdu_results[event_id] = {} except FederationError as e: @@ -212,7 +211,7 @@ class FederationServer(FederationBase): if hasattr(transaction, "edus"): for edu in (Edu(**x) for x in transaction.edus): yield self.received_edu( - transaction.origin, + origin, edu.edu_type, edu.content ) @@ -224,6 +223,7 @@ class FederationServer(FederationBase): logger.debug("Returning: %s", str(response)) yield self.transaction_actions.set_response( + origin, transaction, 200, response ) @@ -838,9 +838,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): ) return self._send_edu( - edu_type=edu_type, - origin=origin, - content=content, + edu_type=edu_type, + origin=origin, + content=content, ) def on_query(self, query_type, args): @@ -851,6 +851,6 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): return handler(args) return self._get_query_client( - query_type=query_type, - args=args, + query_type=query_type, + args=args, ) diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py index 9146215c21..74ffd13b4f 100644 --- a/synapse/federation/persistence.py +++ b/synapse/federation/persistence.py @@ -36,7 +36,7 @@ class TransactionActions(object): self.store = datastore @log_function - def have_responded(self, transaction): + def have_responded(self, origin, transaction): """ Have we already responded to a transaction with the same id and origin? @@ -50,11 +50,11 @@ class TransactionActions(object): "transaction_id") return self.store.get_received_txn_response( - transaction.transaction_id, transaction.origin + transaction.transaction_id, origin ) @log_function - def set_response(self, transaction, code, response): + def set_response(self, origin, transaction, code, response): """ Persist how we responded to a transaction. Returns: @@ -66,7 +66,7 @@ class TransactionActions(object): return self.store.set_received_txn_response( transaction.transaction_id, - transaction.origin, + origin, code, response, ) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 94d7423d01..8cbf8c4f7f 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -463,7 +463,19 @@ class TransactionQueue(object): # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) + + # We can only include at most 50 PDUs per transactions + pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] + if leftover_pdus: + self.pending_pdus_by_dest[destination] = leftover_pdus + pending_edus = self.pending_edus_by_dest.pop(destination, []) + + # We can only include at most 100 EDUs per transactions + pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] + if leftover_edus: + self.pending_edus_by_dest[destination] = leftover_edus + pending_presence = self.pending_presence_by_dest.pop(destination, {}) pending_edus.extend( diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 1054441ca5..2ab973d6c8 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -15,7 +15,8 @@ # limitations under the License. import logging -import urllib + +from six.moves import urllib from twisted.internet import defer @@ -951,4 +952,4 @@ def _create_path(prefix, path, *args): Returns: str """ - return prefix + path % tuple(urllib.quote(arg, "") for arg in args) + return prefix + path % tuple(urllib.parse.quote(arg, "") for arg in args) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 7a993fd1cf..2f874b4838 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -90,8 +90,8 @@ class Authenticator(object): @defer.inlineCallbacks def authenticate_request(self, request, content): json_request = { - "method": request.method, - "uri": request.uri, + "method": request.method.decode('ascii'), + "uri": request.uri.decode('ascii'), "destination": self.server_name, "signatures": {}, } @@ -252,7 +252,7 @@ class BaseFederationServlet(object): by the callback method. None if the request has already been handled. """ content = None - if request.method in ["PUT", "POST"]: + if request.method in [b"PUT", b"POST"]: # TODO: Handle other method types? other content types? content = parse_json_object_from_request(request) @@ -353,7 +353,7 @@ class FederationSendServlet(BaseFederationServlet): try: code, response = yield self.handler.on_incoming_transaction( - transaction_data + origin, transaction_data, ) except Exception: logger.exception("on_incoming_transaction failed") @@ -386,7 +386,7 @@ class FederationStateServlet(BaseFederationServlet): return self.handler.on_context_state_request( origin, context, - query.get("event_id", [None])[0], + parse_string_from_args(query, "event_id", None), ) @@ -397,7 +397,7 @@ class FederationStateIdsServlet(BaseFederationServlet): return self.handler.on_state_ids_request( origin, room_id, - query.get("event_id", [None])[0], + parse_string_from_args(query, "event_id", None), ) @@ -405,14 +405,12 @@ class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P<context>[^/]*)/" def on_GET(self, origin, content, query, context): - versions = query["v"] - limits = query["limit"] + versions = [x.decode('ascii') for x in query[b"v"]] + limit = parse_integer_from_args(query, "limit", None) - if not limits: + if not limit: return defer.succeed((400, {"error": "Did not include limit param"})) - limit = int(limits[-1]) - return self.handler.on_backfill_request(origin, context, versions, limit) @@ -423,7 +421,7 @@ class FederationQueryServlet(BaseFederationServlet): def on_GET(self, origin, content, query, query_type): return self.handler.on_query_request( query_type, - {k: v[0].decode("utf-8") for k, v in query.items()} + {k.decode('utf8'): v[0].decode("utf-8") for k, v in query.items()} ) @@ -630,14 +628,14 @@ class OpenIdUserInfo(BaseFederationServlet): @defer.inlineCallbacks def on_GET(self, origin, content, query): - token = query.get("access_token", [None])[0] + token = query.get(b"access_token", [None])[0] if token is None: defer.returnValue((401, { "errcode": "M_MISSING_TOKEN", "error": "Access Token required" })) return - user_id = yield self.handler.on_openid_userinfo(token) + user_id = yield self.handler.on_openid_userinfo(token.decode('ascii')) if user_id is None: defer.returnValue((401, { diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 4a81bd2ba9..2a5eab124f 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -895,22 +895,24 @@ class AuthHandler(BaseHandler): Args: password (unicode): Password to hash. - stored_hash (unicode): Expected hash value. + stored_hash (bytes): Expected hash value. Returns: Deferred(bool): Whether self.hash(password) == stored_hash. """ - def _do_validate_hash(): # Normalise the Unicode in the password pw = unicodedata.normalize("NFKC", password) return bcrypt.checkpw( pw.encode('utf8') + self.hs.config.password_pepper.encode("utf8"), - stored_hash.encode('utf8') + stored_hash ) if stored_hash: + if not isinstance(stored_hash, bytes): + stored_hash = stored_hash.encode('ascii') + return make_deferred_yieldable( threads.deferToThreadPool( self.hs.get_reactor(), diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index ef866da1b6..18741c5fac 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -20,7 +20,14 @@ import string from twisted.internet import defer from synapse.api.constants import EventTypes -from synapse.api.errors import AuthError, CodeMessageException, Codes, SynapseError +from synapse.api.errors import ( + AuthError, + CodeMessageException, + Codes, + NotFoundError, + StoreError, + SynapseError, +) from synapse.types import RoomAlias, UserID, get_domain_from_id from ._base import BaseHandler @@ -109,7 +116,13 @@ class DirectoryHandler(BaseHandler): def delete_association(self, requester, user_id, room_alias): # association deletion for human users - can_delete = yield self._user_can_delete_alias(room_alias, user_id) + try: + can_delete = yield self._user_can_delete_alias(room_alias, user_id) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown room alias") + raise + if not can_delete: raise AuthError( 403, "You don't have permission to delete the alias.", @@ -320,7 +333,7 @@ class DirectoryHandler(BaseHandler): def _user_can_delete_alias(self, alias, user_id): creator = yield self.store.get_room_alias_creator(alias.to_string()) - if creator and creator == user_id: + if creator is not None and creator == user_id: defer.returnValue(True) is_admin = yield self.auth.is_server_admin(UserID.from_string(user_id)) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 5816bf8b4f..578e9250fb 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -330,7 +330,8 @@ class E2eKeysHandler(object): (algorithm, key_id, ex_json, key) ) else: - new_keys.append((algorithm, key_id, encode_canonical_json(key))) + new_keys.append(( + algorithm, key_id, encode_canonical_json(key).decode('ascii'))) yield self.store.add_e2e_one_time_keys( user_id, device_id, time_now, new_keys @@ -358,7 +359,7 @@ def _exception_to_failure(e): # Note that some Exceptions (notably twisted's ResponseFailed etc) don't # give a string for e.message, which json then fails to serialize. return { - "status": 503, "message": str(e.message), + "status": 503, "message": str(e), } diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3fa7a98445..f10b46414b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -360,6 +360,35 @@ class FederationHandler(BaseHandler): # apparently. # # see https://github.com/matrix-org/synapse/pull/1744 + # + # ---- + # + # Update richvdh 2018/09/18: There are a number of problems with timing this + # request out agressively on the client side: + # + # - it plays badly with the server-side rate-limiter, which starts tarpitting you + # if you send too many requests at once, so you end up with the server carefully + # working through the backlog of your requests, which you have already timed + # out. + # + # - for this request in particular, we now (as of + # https://github.com/matrix-org/synapse/pull/3456) reject any PDUs where the + # server can't produce a plausible-looking set of prev_events - so we becone + # much more likely to reject the event. + # + # - contrary to what it says above, we do *not* fall back to fetching fresh state + # for the room if get_missing_events times out. Rather, we give up processing + # the PDU whose prevs we are missing, which then makes it much more likely that + # we'll end up back here for the *next* PDU in the list, which exacerbates the + # problem. + # + # - the agressive 10s timeout was introduced to deal with incoming federation + # requests taking 8 hours to process. It's not entirely clear why that was going + # on; certainly there were other issues causing traffic storms which are now + # resolved, and I think in any case we may be more sensible about our locking + # now. We're *certainly* more sensible about our logging. + # + # All that said: Let's try increasing the timout to 60s and see what happens. missing_events = yield self.federation_client.get_missing_events( origin, @@ -368,7 +397,7 @@ class FederationHandler(BaseHandler): latest_events=[pdu], limit=10, min_depth=min_depth, - timeout=10000, + timeout=60000, ) logger.info( @@ -594,7 +623,7 @@ class FederationHandler(BaseHandler): required_auth = set( a_id - for event in events + state_events.values() + auth_events.values() + for event in events + list(state_events.values()) + list(auth_events.values()) for a_id, _ in event.auth_events ) auth_events.update({ @@ -802,7 +831,7 @@ class FederationHandler(BaseHandler): ) continue except NotRetryingDestination as e: - logger.info(e.message) + logger.info(str(e)) continue except FederationDeniedError as e: logger.info(e) @@ -1358,7 +1387,7 @@ class FederationHandler(BaseHandler): ) if state_groups: - _, state = state_groups.items().pop() + _, state = list(state_groups.items()).pop() results = state if event.is_state(): diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 5170d093e3..a155b6e938 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -269,14 +269,7 @@ class PaginationHandler(object): if state_ids: state = yield self.store.get_events(list(state_ids.values())) - - if state: - state = yield filter_events_for_client( - self.store, - user_id, - state.values(), - is_peeking=(member_event_id is None), - ) + state = state.values() time_now = self.clock.time_msec() diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 1e53f2c635..da914c46ff 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -534,4 +534,5 @@ class RegistrationHandler(BaseHandler): room_id=room_id, remote_room_hosts=remote_room_hosts, action="join", + ratelimit=False, ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 37e41afd61..38e1737ec9 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -162,7 +162,7 @@ class RoomListHandler(BaseHandler): # Filter out rooms that we don't want to return rooms_to_scan = [ r for r in sorted_rooms - if r not in newly_unpublished and rooms_to_num_joined[room_id] > 0 + if r not in newly_unpublished and rooms_to_num_joined[r] > 0 ] total_room_count = len(rooms_to_scan) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index f643619047..07fd3e82fc 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -583,6 +583,11 @@ class RoomMemberHandler(object): room_id = mapping["room_id"] servers = mapping["servers"] + # put the server which owns the alias at the front of the server list. + if room_alias.domain in servers: + servers.remove(room_alias.domain) + servers.insert(0, room_alias.domain) + defer.returnValue((RoomID.from_string(room_id), servers)) @defer.inlineCallbacks diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index c464adbd0b..0c1d52fd11 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -54,7 +54,7 @@ class SearchHandler(BaseHandler): batch_token = None if batch: try: - b = decode_base64(batch) + b = decode_base64(batch).decode('ascii') batch_group, batch_group_key, batch_token = b.split("\n") assert batch_group is not None @@ -258,18 +258,18 @@ class SearchHandler(BaseHandler): # it returns more from the same group (if applicable) rather # than reverting to searching all results again. if batch_group and batch_group_key: - global_next_batch = encode_base64("%s\n%s\n%s" % ( + global_next_batch = encode_base64(("%s\n%s\n%s" % ( batch_group, batch_group_key, pagination_token - )) + )).encode('ascii')) else: - global_next_batch = encode_base64("%s\n%s\n%s" % ( + global_next_batch = encode_base64(("%s\n%s\n%s" % ( "all", "", pagination_token - )) + )).encode('ascii')) for room_id, group in room_groups.items(): - group["next_batch"] = encode_base64("%s\n%s\n%s" % ( + group["next_batch"] = encode_base64(("%s\n%s\n%s" % ( "room_id", room_id, pagination_token - )) + )).encode('ascii')) allowed_events.extend(room_events) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ef20c2296c..9bca4e7067 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -24,6 +24,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.push.clientformat import format_push_rules_for_user +from synapse.storage.roommember import MemberSummary from synapse.types import RoomStreamToken from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -525,6 +526,8 @@ class SyncHandler(object): A deferred dict describing the room summary """ + # FIXME: we could/should get this from room_stats when matthew/stats lands + # FIXME: this promulgates https://github.com/matrix-org/synapse/issues/3305 last_events, _ = yield self.store.get_recent_event_ids_for_room( room_id, end_token=now_token.room_key, limit=1, @@ -537,44 +540,67 @@ class SyncHandler(object): last_event = last_events[-1] state_ids = yield self.store.get_state_ids_for_event( last_event.event_id, [ - (EventTypes.Member, None), (EventTypes.Name, ''), (EventTypes.CanonicalAlias, ''), ] ) - member_ids = { - state_key: event_id - for (t, state_key), event_id in state_ids.iteritems() - if t == EventTypes.Member - } + # this is heavily cached, thus: fast. + details = yield self.store.get_room_summary(room_id) + name_id = state_ids.get((EventTypes.Name, '')) canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, '')) summary = {} - - # FIXME: it feels very heavy to load up every single membership event - # just to calculate the counts. - member_events = yield self.store.get_events(member_ids.values()) - - joined_user_ids = [] - invited_user_ids = [] - - for ev in member_events.values(): - if ev.content.get("membership") == Membership.JOIN: - joined_user_ids.append(ev.state_key) - elif ev.content.get("membership") == Membership.INVITE: - invited_user_ids.append(ev.state_key) + empty_ms = MemberSummary([], 0) # TODO: only send these when they change. - summary["m.joined_member_count"] = len(joined_user_ids) - summary["m.invited_member_count"] = len(invited_user_ids) + summary["m.joined_member_count"] = ( + details.get(Membership.JOIN, empty_ms).count + ) + summary["m.invited_member_count"] = ( + details.get(Membership.INVITE, empty_ms).count + ) - if name_id or canonical_alias_id: - defer.returnValue(summary) + # if the room has a name or canonical_alias set, we can skip + # calculating heroes. we assume that if the event has contents, it'll + # be a valid name or canonical_alias - i.e. we're checking that they + # haven't been "deleted" by blatting {} over the top. + if name_id: + name = yield self.store.get_event(name_id, allow_none=False) + if name and name.content: + defer.returnValue(summary) + + if canonical_alias_id: + canonical_alias = yield self.store.get_event( + canonical_alias_id, allow_none=False, + ) + if canonical_alias and canonical_alias.content: + defer.returnValue(summary) - # FIXME: order by stream ordering, not alphabetic + joined_user_ids = [ + r[0] for r in details.get(Membership.JOIN, empty_ms).members + ] + invited_user_ids = [ + r[0] for r in details.get(Membership.INVITE, empty_ms).members + ] + gone_user_ids = ( + [r[0] for r in details.get(Membership.LEAVE, empty_ms).members] + + [r[0] for r in details.get(Membership.BAN, empty_ms).members] + ) + # FIXME: only build up a member_ids list for our heroes + member_ids = {} + for membership in ( + Membership.JOIN, + Membership.INVITE, + Membership.LEAVE, + Membership.BAN + ): + for user_id, event_id in details.get(membership, empty_ms).members: + member_ids[user_id] = event_id + + # FIXME: order by stream ordering rather than as returned by SQL me = sync_config.user.to_string() if (joined_user_ids or invited_user_ids): summary['m.heroes'] = sorted( @@ -586,7 +612,11 @@ class SyncHandler(object): )[0:5] else: summary['m.heroes'] = sorted( - [user_id for user_id in member_ids.keys() if user_id != me] + [ + user_id + for user_id in gone_user_ids + if user_id != me + ] )[0:5] if not sync_config.filter_collection.lazy_load_members(): @@ -719,6 +749,26 @@ class SyncHandler(object): lazy_load_members=lazy_load_members, ) elif batch.limited: + state_at_timeline_start = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types, + filtered_types=filtered_types, + ) + + # for now, we disable LL for gappy syncs - see + # https://github.com/vector-im/riot-web/issues/7211#issuecomment-419976346 + # N.B. this slows down incr syncs as we are now processing way + # more state in the server than if we were LLing. + # + # We still have to filter timeline_start to LL entries (above) in order + # for _calculate_state's LL logic to work, as we have to include LL + # members for timeline senders in case they weren't loaded in the initial + # sync. We do this by (counterintuitively) by filtering timeline_start + # members to just be ones which were timeline senders, which then ensures + # all of the rest get included in the state block (if we need to know + # about them). + types = None + filtered_types = None + state_at_previous_sync = yield self.get_state_at( room_id, stream_position=since_token, types=types, filtered_types=filtered_types, @@ -729,24 +779,21 @@ class SyncHandler(object): filtered_types=filtered_types, ) - state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types, - filtered_types=filtered_types, - ) - state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, previous=state_at_previous_sync, current=current_state_ids, + # we have to include LL members in case LL initial sync missed them lazy_load_members=lazy_load_members, ) else: state_ids = {} if lazy_load_members: if types: - # We're returning an incremental sync, with no "gap" since - # the previous sync, so normally there would be no state to return + # We're returning an incremental sync, with no + # "gap" since the previous sync, so normally there would be + # no state to return. # But we're lazy-loading, so the client might need some more # member events to understand the events in this timeline. # So we fish out all the member events corresponding to the @@ -774,7 +821,7 @@ class SyncHandler(object): logger.debug("filtering state from %r...", state_ids) state_ids = { t: event_id - for t, event_id in state_ids.iteritems() + for t, event_id in iteritems(state_ids) if cache.get(t[1]) != event_id } logger.debug("...to %r", state_ids) @@ -1575,6 +1622,19 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) + # When we join the room (or the client requests full_state), we should + # send down any existing tags. Usually the user won't have tags in a + # newly joined room, unless either a) they've joined before or b) the + # tag was added by synapse e.g. for server notice rooms. + if full_state: + user_id = sync_result_builder.sync_config.user.to_string() + tags = yield self.store.get_tags_for_room(user_id, room_id) + + # If there aren't any tags, don't send the empty tags list down + # sync + if not tags: + tags = None + account_data_events = [] if tags is not None: account_data_events.append({ @@ -1603,10 +1663,24 @@ class SyncHandler(object): ) summary = {} + + # we include a summary in room responses when we're lazy loading + # members (as the client otherwise doesn't have enough info to form + # the name itself). if ( sync_config.filter_collection.lazy_load_members() and ( + # we recalulate the summary: + # if there are membership changes in the timeline, or + # if membership has changed during a gappy sync, or + # if this is an initial sync. any(ev.type == EventTypes.Member for ev in batch.events) or + ( + # XXX: this may include false positives in the form of LL + # members which have snuck into state + batch.limited and + any(t == EventTypes.Member for (t, k) in state) + ) or since_token is None ) ): @@ -1636,6 +1710,16 @@ class SyncHandler(object): unread_notifications["highlight_count"] = notifs["highlight_count"] sync_result_builder.joined.append(room_sync) + + if batch.limited and since_token: + user_id = sync_result_builder.sync_config.user.to_string() + logger.info( + "Incremental gappy sync of %s for user %s with %d state events" % ( + room_id, + user_id, + len(state), + ) + ) elif room_builder.rtype == "archived": room_sync = ArchivedSyncResult( room_id=room_id, @@ -1729,17 +1813,17 @@ def _calculate_state( event_id_to_key = { e: key for key, e in itertools.chain( - timeline_contains.items(), - previous.items(), - timeline_start.items(), - current.items(), + iteritems(timeline_contains), + iteritems(previous), + iteritems(timeline_start), + iteritems(current), ) } - c_ids = set(e for e in current.values()) - ts_ids = set(e for e in timeline_start.values()) - p_ids = set(e for e in previous.values()) - tc_ids = set(e for e in timeline_contains.values()) + c_ids = set(e for e in itervalues(current)) + ts_ids = set(e for e in itervalues(timeline_start)) + p_ids = set(e for e in itervalues(previous)) + tc_ids = set(e for e in itervalues(timeline_contains)) # If we are lazyloading room members, we explicitly add the membership events # for the senders in the timeline into the state block returned by /sync, @@ -1753,7 +1837,7 @@ def _calculate_state( if lazy_load_members: p_ids.difference_update( - e for t, e in timeline_start.iteritems() + e for t, e in iteritems(timeline_start) if t[0] == EventTypes.Member ) diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 58ef8d3ce4..a3f9e4f67c 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -38,12 +38,12 @@ def cancelled_to_request_timed_out_error(value, timeout): return value -ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') +ACCESS_TOKEN_RE = re.compile(r'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') def redact_uri(uri): """Strips access tokens from the uri replaces with <redacted>""" return ACCESS_TOKEN_RE.sub( - br'\1<redacted>\3', + r'\1<redacted>\3', uri ) diff --git a/synapse/http/client.py b/synapse/http/client.py index ab4fbf59b2..3d05f83b8c 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -13,24 +13,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging -import urllib -from six import StringIO +from six import text_type +from six.moves import urllib +import treq from canonicaljson import encode_canonical_json, json from prometheus_client import Counter from OpenSSL import SSL from OpenSSL.SSL import VERIFY_NONE -from twisted.internet import defer, protocol, reactor, ssl, task +from twisted.internet import defer, protocol, reactor, ssl from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone from twisted.web.client import ( Agent, BrowserLikeRedirectAgent, ContentDecoderAgent, - FileBodyProducer as TwistedFileBodyProducer, GzipDecoder, HTTPConnectionPool, PartialDownloadError, @@ -42,7 +43,7 @@ from twisted.web.http_headers import Headers from synapse.api.errors import Codes, HttpResponseException, SynapseError from synapse.http import cancelled_to_request_timed_out_error, redact_uri from synapse.http.endpoint import SpiderEndpoint -from synapse.util.async_helpers import add_timeout_to_deferred +from synapse.util.async_helpers import timeout_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable @@ -83,8 +84,10 @@ class SimpleHttpClient(object): if hs.config.user_agent_suffix: self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,) + self.user_agent = self.user_agent.encode('ascii') + @defer.inlineCallbacks - def request(self, method, uri, *args, **kwargs): + def request(self, method, uri, data=b'', headers=None): # A small wrapper around self.agent.request() so we can easily attach # counters to it outgoing_requests_counter.labels(method).inc() @@ -93,10 +96,10 @@ class SimpleHttpClient(object): logger.info("Sending request %s %s", method, redact_uri(uri)) try: - request_deferred = self.agent.request( - method, uri, *args, **kwargs + request_deferred = treq.request( + method, uri, agent=self.agent, data=data, headers=headers ) - add_timeout_to_deferred( + request_deferred = timeout_deferred( request_deferred, 60, self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) @@ -112,7 +115,7 @@ class SimpleHttpClient(object): incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, redact_uri(uri), type(e).__name__, e.message + method, redact_uri(uri), type(e).__name__, e.args[0] ) raise @@ -137,7 +140,8 @@ class SimpleHttpClient(object): # TODO: Do we ever want to log message contents? logger.debug("post_urlencoded_get_json args: %s", args) - query_bytes = urllib.urlencode(encode_urlencode_args(args), True) + query_bytes = urllib.parse.urlencode( + encode_urlencode_args(args), True).encode("utf8") actual_headers = { b"Content-Type": [b"application/x-www-form-urlencoded"], @@ -148,15 +152,14 @@ class SimpleHttpClient(object): response = yield self.request( "POST", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(query_bytes)) + data=query_bytes ) - body = yield make_deferred_yieldable(readBody(response)) - if 200 <= response.code < 300: - defer.returnValue(json.loads(body)) + body = yield make_deferred_yieldable(treq.json_content(response)) + defer.returnValue(body) else: raise HttpResponseException(response.code, response.phrase, body) @@ -191,9 +194,9 @@ class SimpleHttpClient(object): response = yield self.request( "POST", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(json_str)) + data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -248,7 +251,7 @@ class SimpleHttpClient(object): ValueError: if the response was not JSON """ if len(args): - query_bytes = urllib.urlencode(args, True) + query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) json_str = encode_canonical_json(json_body) @@ -262,9 +265,9 @@ class SimpleHttpClient(object): response = yield self.request( "PUT", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), - bodyProducer=FileBodyProducer(StringIO(json_str)) + data=json_str ) body = yield make_deferred_yieldable(readBody(response)) @@ -293,7 +296,7 @@ class SimpleHttpClient(object): HttpResponseException on a non-2xx HTTP response. """ if len(args): - query_bytes = urllib.urlencode(args, True) + query_bytes = urllib.parse.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) actual_headers = { @@ -304,7 +307,7 @@ class SimpleHttpClient(object): response = yield self.request( "GET", - uri.encode("ascii"), + uri, headers=Headers(actual_headers), ) @@ -339,13 +342,14 @@ class SimpleHttpClient(object): response = yield self.request( "GET", - url.encode("ascii"), + url, headers=Headers(actual_headers), ) resp_headers = dict(response.headers.getAllRawHeaders()) - if 'Content-Length' in resp_headers and resp_headers['Content-Length'] > max_size: + if (b'Content-Length' in resp_headers and + int(resp_headers[b'Content-Length']) > max_size): logger.warn("Requested URL is too large > %r bytes" % (self.max_size,)) raise SynapseError( 502, @@ -378,7 +382,12 @@ class SimpleHttpClient(object): ) defer.returnValue( - (length, resp_headers, response.request.absoluteURI, response.code), + ( + length, + resp_headers, + response.request.absoluteURI.decode('ascii'), + response.code, + ), ) @@ -434,12 +443,12 @@ class CaptchaServerHttpClient(SimpleHttpClient): @defer.inlineCallbacks def post_urlencoded_get_raw(self, url, args={}): - query_bytes = urllib.urlencode(encode_urlencode_args(args), True) + query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True) response = yield self.request( "POST", - url.encode("ascii"), - bodyProducer=FileBodyProducer(StringIO(query_bytes)), + url, + data=query_bytes, headers=Headers({ b"Content-Type": [b"application/x-www-form-urlencoded"], b"User-Agent": [self.user_agent], @@ -463,9 +472,9 @@ class SpiderEndpointFactory(object): def endpointForURI(self, uri): logger.info("Getting endpoint for %s", uri.toBytes()) - if uri.scheme == "http": + if uri.scheme == b"http": endpoint_factory = HostnameEndpoint - elif uri.scheme == "https": + elif uri.scheme == b"https": tlsCreator = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) def endpoint_factory(reactor, host, port, **kw): @@ -510,7 +519,7 @@ def encode_urlencode_args(args): def encode_urlencode_arg(arg): - if isinstance(arg, unicode): + if isinstance(arg, text_type): return arg.encode('utf-8') elif isinstance(arg, list): return [encode_urlencode_arg(i) for i in arg] @@ -542,26 +551,3 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): def creatorForNetloc(self, hostname, port): return self - - -class FileBodyProducer(TwistedFileBodyProducer): - """Workaround for https://twistedmatrix.com/trac/ticket/8473 - - We override the pauseProducing and resumeProducing methods in twisted's - FileBodyProducer so that they do not raise exceptions if the task has - already completed. - """ - - def pauseProducing(self): - try: - super(FileBodyProducer, self).pauseProducing() - except task.TaskDone: - # task has already completed - pass - - def resumeProducing(self): - try: - super(FileBodyProducer, self).resumeProducing() - except task.NotPaused: - # task was not paused (probably because it had already completed) - pass diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index b0c9369519..91025037a3 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -108,7 +108,7 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory= Args: reactor: Twisted reactor. - destination (bytes): The name of the server to connect to. + destination (unicode): The name of the server to connect to. tls_client_options_factory (synapse.crypto.context_factory.ClientTLSOptionsFactory): Factory which generates TLS options for client connections. @@ -126,10 +126,17 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory= transport_endpoint = HostnameEndpoint default_port = 8008 else: + # the SNI string should be the same as the Host header, minus the port. + # as per https://github.com/matrix-org/synapse/issues/2525#issuecomment-336896777, + # the Host header and SNI should therefore be the server_name of the remote + # server. + tls_options = tls_client_options_factory.get_options(domain) + def transport_endpoint(reactor, host, port, timeout): return wrapClientTLS( - tls_client_options_factory.get_options(host), - HostnameEndpoint(reactor, host, port, timeout=timeout)) + tls_options, + HostnameEndpoint(reactor, host, port, timeout=timeout), + ) default_port = 8448 if port is None: diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b34bb8e31a..14b12cd1c4 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -17,19 +17,22 @@ import cgi import logging import random import sys -import urllib +from io import BytesIO -from six import string_types -from six.moves.urllib import parse as urlparse +from six import PY3, string_types +from six.moves import urllib -from canonicaljson import encode_canonical_json, json +import attr +import treq +from canonicaljson import encode_canonical_json from prometheus_client import Counter from signedjson.sign import sign_json -from twisted.internet import defer, protocol, reactor +from twisted.internet import defer, protocol from twisted.internet.error import DNSLookupError +from twisted.internet.task import _EPSILON, Cooperator from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, HTTPConnectionPool, readBody +from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool from twisted.web.http_headers import Headers import synapse.metrics @@ -40,14 +43,12 @@ from synapse.api.errors import ( HttpResponseException, SynapseError, ) -from synapse.http import cancelled_to_request_timed_out_error from synapse.http.endpoint import matrix_federation_endpoint -from synapse.util import logcontext -from synapse.util.async_helpers import add_timeout_to_deferred +from synapse.util.async_helpers import timeout_deferred from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.metrics import Measure logger = logging.getLogger(__name__) -outbound_logger = logging.getLogger("synapse.http.outbound") outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"]) @@ -58,20 +59,119 @@ incoming_responses_counter = Counter("synapse_http_matrixfederationclient_respon MAX_LONG_RETRIES = 10 MAX_SHORT_RETRIES = 3 +if PY3: + MAXINT = sys.maxsize +else: + MAXINT = sys.maxint + class MatrixFederationEndpointFactory(object): def __init__(self, hs): + self.reactor = hs.get_reactor() self.tls_client_options_factory = hs.tls_client_options_factory def endpointForURI(self, uri): - destination = uri.netloc + destination = uri.netloc.decode('ascii') return matrix_federation_endpoint( - reactor, destination, timeout=10, + self.reactor, destination, timeout=10, tls_client_options_factory=self.tls_client_options_factory ) +_next_id = 1 + + +@attr.s +class MatrixFederationRequest(object): + method = attr.ib() + """HTTP method + :type: str + """ + + path = attr.ib() + """HTTP path + :type: str + """ + + destination = attr.ib() + """The remote server to send the HTTP request to. + :type: str""" + + json = attr.ib(default=None) + """JSON to send in the body. + :type: dict|None + """ + + json_callback = attr.ib(default=None) + """A callback to generate the JSON. + :type: func|None + """ + + query = attr.ib(default=None) + """Query arguments. + :type: dict|None + """ + + txn_id = attr.ib(default=None) + """Unique ID for this request (for logging) + :type: str|None + """ + + def __attrs_post_init__(self): + global _next_id + self.txn_id = "%s-O-%s" % (self.method, _next_id) + _next_id = (_next_id + 1) % (MAXINT - 1) + + def get_json(self): + if self.json_callback: + return self.json_callback() + return self.json + + +@defer.inlineCallbacks +def _handle_json_response(reactor, timeout_sec, request, response): + """ + Reads the JSON body of a response, with a timeout + + Args: + reactor (IReactor): twisted reactor, for the timeout + timeout_sec (float): number of seconds to wait for response to complete + request (MatrixFederationRequest): the request that triggered the response + response (IResponse): response to the request + + Returns: + dict: parsed JSON response + """ + try: + check_content_type_is_json(response.headers) + + d = treq.json_content(response) + d = timeout_deferred( + d, + timeout=timeout_sec, + reactor=reactor, + ) + + body = yield make_deferred_yieldable(d) + except Exception as e: + logger.warn( + "{%s} [%s] Error reading response: %s", + request.txn_id, + request.destination, + e, + ) + raise + logger.info( + "{%s} [%s] Completed: %d %s", + request.txn_id, + request.destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + ) + defer.returnValue(body) + + class MatrixFederationHttpClient(object): """HTTP client used to talk to other homeservers over the federation protocol. Send client certificates and signs requests. @@ -85,7 +185,9 @@ class MatrixFederationHttpClient(object): self.hs = hs self.signing_key = hs.config.signing_key[0] self.server_name = hs.hostname + reactor = hs.get_reactor() pool = HTTPConnectionPool(reactor) + pool.retryAutomatically = False pool.maxPersistentPerHost = 5 pool.cachedConnectionTimeout = 2 * 60 self.agent = Agent.usingEndpointFactory( @@ -93,28 +195,36 @@ class MatrixFederationHttpClient(object): ) self.clock = hs.get_clock() self._store = hs.get_datastore() - self.version_string = hs.version_string - self._next_id = 1 + self.version_string = hs.version_string.encode('ascii') + self.default_timeout = 60 - def _create_url(self, destination, path_bytes, param_bytes, query_bytes): - return urlparse.urlunparse( - ("matrix", destination, path_bytes, param_bytes, query_bytes, "") - ) + def schedule(x): + reactor.callLater(_EPSILON, x) + + self._cooperator = Cooperator(scheduler=schedule) @defer.inlineCallbacks - def _request(self, destination, method, path, - body_callback, headers_dict={}, param_bytes=b"", - query_bytes=b"", retry_on_dns_fail=True, - timeout=None, long_retries=False, - ignore_backoff=False, - backoff_on_404=False): - """ Creates and sends a request to the given server + def _send_request( + self, + request, + retry_on_dns_fail=True, + timeout=None, + long_retries=False, + ignore_backoff=False, + backoff_on_404=False + ): + """ + Sends a request to the given server. + Args: - destination (str): The remote server to send the HTTP request to. - method (str): HTTP method - path (str): The HTTP path + request (MatrixFederationRequest): details of request to be sent + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server). 60s by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + backoff_on_404 (bool): Back off if we get a 404 Returns: @@ -132,38 +242,39 @@ class MatrixFederationHttpClient(object): (May also fail with plenty of other Exceptions for things like DNS failures, connection failures, SSL failures.) """ + if timeout: + _sec_timeout = timeout / 1000 + else: + _sec_timeout = self.default_timeout + if ( self.hs.config.federation_domain_whitelist is not None and - destination not in self.hs.config.federation_domain_whitelist + request.destination not in self.hs.config.federation_domain_whitelist ): - raise FederationDeniedError(destination) + raise FederationDeniedError(request.destination) limiter = yield synapse.util.retryutils.get_retry_limiter( - destination, + request.destination, self.clock, self._store, backoff_on_404=backoff_on_404, ignore_backoff=ignore_backoff, ) - destination = destination.encode("ascii") - path_bytes = path.encode("ascii") - with limiter: - headers_dict[b"User-Agent"] = [self.version_string] - headers_dict[b"Host"] = [destination] - - url_bytes = self._create_url( - destination, path_bytes, param_bytes, query_bytes - ) - - txn_id = "%s-O-%s" % (method, self._next_id) - self._next_id = (self._next_id + 1) % (sys.maxint - 1) + method = request.method + destination = request.destination + path_bytes = request.path.encode("ascii") + if request.query: + query_bytes = encode_query_args(request.query) + else: + query_bytes = b"" - outbound_logger.info( - "{%s} [%s] Sending request: %s %s", - txn_id, destination, method, url_bytes - ) + headers_dict = { + "User-Agent": [self.version_string], + "Host": [request.destination], + } + with limiter: # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: @@ -171,88 +282,119 @@ class MatrixFederationHttpClient(object): else: retries_left = MAX_SHORT_RETRIES - http_url_bytes = urlparse.urlunparse( - ("", "", path_bytes, param_bytes, query_bytes, "") - ) - - log_result = None - try: - while True: - producer = None - if body_callback: - producer = body_callback(method, http_url_bytes, headers_dict) - - try: - request_deferred = self.agent.request( - method, - url_bytes, - Headers(headers_dict), - producer + url = urllib.parse.urlunparse(( + b"matrix", destination.encode("ascii"), + path_bytes, None, query_bytes, b"", + )).decode('ascii') + + http_url = urllib.parse.urlunparse(( + b"", b"", + path_bytes, None, query_bytes, b"", + )).decode('ascii') + + while True: + try: + json = request.get_json() + if json: + data = encode_canonical_json(json) + headers_dict["Content-Type"] = ["application/json"] + self.sign_request( + destination, method, http_url, headers_dict, json ) - add_timeout_to_deferred( - request_deferred, - timeout / 1000. if timeout else 60, - self.hs.get_reactor(), - cancelled_to_request_timed_out_error, + else: + data = None + self.sign_request(destination, method, http_url, headers_dict) + + logger.info( + "{%s} [%s] Sending request: %s %s", + request.txn_id, destination, method, url + ) + + if data: + producer = FileBodyProducer( + BytesIO(data), + cooperator=self._cooperator ) + else: + producer = None + + request_deferred = treq.request( + method, + url, + headers=Headers(headers_dict), + data=producer, + agent=self.agent, + reactor=self.hs.get_reactor(), + unbuffered=True + ) + + request_deferred = timeout_deferred( + request_deferred, + timeout=_sec_timeout, + reactor=self.hs.get_reactor(), + ) + + with Measure(self.clock, "outbound_request"): response = yield make_deferred_yieldable( request_deferred, ) - log_result = "%d %s" % (response.code, response.phrase,) - break - except Exception as e: - if not retry_on_dns_fail and isinstance(e, DNSLookupError): - logger.warn( - "DNS Lookup failed to %s with %s", - destination, - e - ) - log_result = "DNS Lookup failed to %s with %s" % ( - destination, e - ) - raise - - logger.warn( - "{%s} Sending request failed to %s: %s %s: %s", - txn_id, + break + except Exception as e: + logger.warn( + "{%s} [%s] Request failed: %s %s: %s", + request.txn_id, + destination, + method, + url, + _flatten_response_never_received(e), + ) + + if not retry_on_dns_fail and isinstance(e, DNSLookupError): + raise + + if retries_left and not timeout: + if long_retries: + delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) + delay = min(delay, 60) + delay *= random.uniform(0.8, 1.4) + else: + delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) + delay = min(delay, 2) + delay *= random.uniform(0.8, 1.4) + + logger.debug( + "{%s} [%s] Waiting %ss before re-sending...", + request.txn_id, destination, - method, - url_bytes, - _flatten_response_never_received(e), + delay, ) - log_result = _flatten_response_never_received(e) - - if retries_left and not timeout: - if long_retries: - delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) - delay = min(delay, 60) - delay *= random.uniform(0.8, 1.4) - else: - delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) - delay = min(delay, 2) - delay *= random.uniform(0.8, 1.4) - - yield self.clock.sleep(delay) - retries_left -= 1 - else: - raise - finally: - outbound_logger.info( - "{%s} [%s] Result: %s", - txn_id, - destination, - log_result, - ) + yield self.clock.sleep(delay) + retries_left -= 1 + else: + raise + + logger.info( + "{%s} [%s] Got response headers: %d %s", + request.txn_id, + destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + ) if 200 <= response.code < 300: pass else: # :'( # Update transactions table? - with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + d = treq.content(response) + d = timeout_deferred( + d, + timeout=_sec_timeout, + reactor=self.hs.get_reactor(), + ) + body = yield make_deferred_yieldable(d) raise HttpResponseException( response.code, response.phrase, body ) @@ -297,11 +439,11 @@ class MatrixFederationHttpClient(object): auth_headers = [] for key, sig in request["signatures"][self.server_name].items(): - auth_headers.append(bytes( + auth_headers.append(( "X-Matrix origin=%s,key=\"%s\",sig=\"%s\"" % ( self.server_name, key, sig, - ) - )) + )).encode('ascii') + ) headers_dict[b"Authorization"] = auth_headers @@ -346,38 +488,27 @@ class MatrixFederationHttpClient(object): is not on our federation whitelist """ - if not json_data_callback: - def json_data_callback(): - return data + request = MatrixFederationRequest( + method="PUT", + destination=destination, + path=path, + query=args, + json_callback=json_data_callback, + json=data, + ) - def body_callback(method, url_bytes, headers_dict): - json_data = json_data_callback() - self.sign_request( - destination, method, url_bytes, headers_dict, json_data - ) - producer = _JsonProducer(json_data) - return producer - - response = yield self._request( - destination, - "PUT", - path, - body_callback=body_callback, - headers_dict={"Content-Type": ["application/json"]}, - query_bytes=encode_query_args(args), + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, backoff_on_404=backoff_on_404, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - body = yield readBody(response) - defer.returnValue(json.loads(body)) + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) + defer.returnValue(body) @defer.inlineCallbacks def post_json(self, destination, path, data={}, long_retries=False, @@ -411,32 +542,30 @@ class MatrixFederationHttpClient(object): is not on our federation whitelist """ - def body_callback(method, url_bytes, headers_dict): - self.sign_request( - destination, method, url_bytes, headers_dict, data - ) - return _JsonProducer(data) - - response = yield self._request( - destination, - "POST", - path, - query_bytes=encode_query_args(args), - body_callback=body_callback, - headers_dict={"Content-Type": ["application/json"]}, + request = MatrixFederationRequest( + method="POST", + destination=destination, + path=path, + query=args, + json=data, + ) + + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - body = yield readBody(response) + if timeout: + _sec_timeout = timeout / 1000 + else: + _sec_timeout = self.default_timeout - defer.returnValue(json.loads(body)) + body = yield _handle_json_response( + self.hs.get_reactor(), _sec_timeout, request, response, + ) + defer.returnValue(body) @defer.inlineCallbacks def get_json(self, destination, path, args=None, retry_on_dns_fail=True, @@ -471,29 +600,24 @@ class MatrixFederationHttpClient(object): logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) - def body_callback(method, url_bytes, headers_dict): - self.sign_request(destination, method, url_bytes, headers_dict) - return None + request = MatrixFederationRequest( + method="GET", + destination=destination, + path=path, + query=args, + ) - response = yield self._request( - destination, - "GET", - path, - query_bytes=encode_query_args(args), - body_callback=body_callback, + response = yield self._send_request( + request, retry_on_dns_fail=retry_on_dns_fail, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - body = yield readBody(response) - - defer.returnValue(json.loads(body)) + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) + defer.returnValue(body) @defer.inlineCallbacks def delete_json(self, destination, path, long_retries=False, @@ -523,26 +647,24 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ + request = MatrixFederationRequest( + method="DELETE", + destination=destination, + path=path, + query=args, + ) - response = yield self._request( - destination, - "DELETE", - path, - query_bytes=encode_query_args(args), - headers_dict={"Content-Type": ["application/json"]}, + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - body = yield readBody(response) - - defer.returnValue(json.loads(body)) + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) + defer.returnValue(body) @defer.inlineCallbacks def get_file(self, destination, path, output_stream, args={}, @@ -569,26 +691,15 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ + request = MatrixFederationRequest( + method="GET", + destination=destination, + path=path, + query=args, + ) - encoded_args = {} - for k, vs in args.items(): - if isinstance(vs, string_types): - vs = [vs] - encoded_args[k] = [v.encode("UTF-8") for v in vs] - - query_bytes = urllib.urlencode(encoded_args, True) - logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail) - - def body_callback(method, url_bytes, headers_dict): - self.sign_request(destination, method, url_bytes, headers_dict) - return None - - response = yield self._request( - destination, - "GET", - path, - query_bytes=query_bytes, - body_callback=body_callback, + response = yield self._send_request( + request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff, ) @@ -596,14 +707,25 @@ class MatrixFederationHttpClient(object): headers = dict(response.headers.getAllRawHeaders()) try: - with logcontext.PreserveLoggingContext(): - length = yield _readBodyToFile( - response, output_stream, max_size - ) - except Exception: - logger.exception("Failed to download body") + d = _readBodyToFile(response, output_stream, max_size) + d.addTimeout(self.default_timeout, self.hs.get_reactor()) + length = yield make_deferred_yieldable(d) + except Exception as e: + logger.warn( + "{%s} [%s] Error reading response: %s", + request.txn_id, + request.destination, + e, + ) raise - + logger.info( + "{%s} [%s] Completed: %d %s [%d bytes]", + request.txn_id, + request.destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + length, + ) defer.returnValue((length, headers)) @@ -639,30 +761,6 @@ def _readBodyToFile(response, stream, max_size): return d -class _JsonProducer(object): - """ Used by the twisted http client to create the HTTP body from json - """ - def __init__(self, jsn): - self.reset(jsn) - - def reset(self, jsn): - self.body = encode_canonical_json(jsn) - self.length = len(self.body) - - def startProducing(self, consumer): - consumer.write(self.body) - return defer.succeed(None) - - def pauseProducing(self): - pass - - def stopProducing(self): - pass - - def resumeProducing(self): - pass - - def _flatten_response_never_received(e): if hasattr(e, "reasons"): reasons = ", ".join( @@ -693,7 +791,7 @@ def check_content_type_is_json(headers): "No Content-Type header" ) - c_type = c_type[0] # only the first header + c_type = c_type[0].decode('ascii') # only the first header val, options = cgi.parse_header(c_type) if val != "application/json": raise RuntimeError( @@ -711,6 +809,6 @@ def encode_query_args(args): vs = [vs] encoded_args[k] = [v.encode("UTF-8") for v in vs] - query_bytes = urllib.urlencode(encoded_args, True) + query_bytes = urllib.parse.urlencode(encoded_args, True) - return query_bytes + return query_bytes.encode('utf8') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 72c2654678..fedb4e6b18 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -162,7 +162,7 @@ class RequestMetrics(object): with _in_flight_requests_lock: _in_flight_requests.add(self) - def stop(self, time_sec, request): + def stop(self, time_sec, response_code, sent_bytes): with _in_flight_requests_lock: _in_flight_requests.discard(self) @@ -179,35 +179,35 @@ class RequestMetrics(object): ) return - response_code = str(request.code) + response_code = str(response_code) - outgoing_responses_counter.labels(request.method, response_code).inc() + outgoing_responses_counter.labels(self.method, response_code).inc() - response_count.labels(request.method, self.name, tag).inc() + response_count.labels(self.method, self.name, tag).inc() - response_timer.labels(request.method, self.name, tag, response_code).observe( + response_timer.labels(self.method, self.name, tag, response_code).observe( time_sec - self.start ) resource_usage = context.get_resource_usage() - response_ru_utime.labels(request.method, self.name, tag).inc( + response_ru_utime.labels(self.method, self.name, tag).inc( resource_usage.ru_utime, ) - response_ru_stime.labels(request.method, self.name, tag).inc( + response_ru_stime.labels(self.method, self.name, tag).inc( resource_usage.ru_stime, ) - response_db_txn_count.labels(request.method, self.name, tag).inc( + response_db_txn_count.labels(self.method, self.name, tag).inc( resource_usage.db_txn_count ) - response_db_txn_duration.labels(request.method, self.name, tag).inc( + response_db_txn_duration.labels(self.method, self.name, tag).inc( resource_usage.db_txn_duration_sec ) - response_db_sched_duration.labels(request.method, self.name, tag).inc( + response_db_sched_duration.labels(self.method, self.name, tag).inc( resource_usage.db_sched_duration_sec ) - response_size.labels(request.method, self.name, tag).inc(request.sentLength) + response_size.labels(self.method, self.name, tag).inc(sent_bytes) # We always call this at the end to ensure that we update the metrics # regardless of whether a call to /metrics while the request was in diff --git a/synapse/http/site.py b/synapse/http/site.py index 88ed3714f9..9579e8cd0d 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -82,10 +82,13 @@ class SynapseRequest(Request): ) def get_request_id(self): - return "%s-%i" % (self.method, self.request_seq) + return "%s-%i" % (self.method.decode('ascii'), self.request_seq) def get_redacted_uri(self): - return redact_uri(self.uri) + uri = self.uri + if isinstance(uri, bytes): + uri = self.uri.decode('ascii') + return redact_uri(uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] @@ -116,7 +119,7 @@ class SynapseRequest(Request): # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics - requests_counter.labels(self.method, + requests_counter.labels(self.method.decode('ascii'), self.request_metrics.name).inc() @contextlib.contextmanager @@ -204,14 +207,14 @@ class SynapseRequest(Request): self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( - self.start_time, name=servlet_name, method=self.method, + self.start_time, name=servlet_name, method=self.method.decode('ascii'), ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, - self.method, + self.method.decode('ascii'), self.get_redacted_uri() ) @@ -277,15 +280,15 @@ class SynapseRequest(Request): int(usage.db_txn_count), self.sentLength, code, - self.method, + self.method.decode('ascii'), self.get_redacted_uri(), - self.clientproto, + self.clientproto.decode('ascii', errors='replace'), user_agent, usage.evt_db_fetch_count, ) try: - self.request_metrics.stop(self.finish_time, self) + self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warn("Failed to stop metrics: %r", e) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 550f8443f7..59900aa5d1 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -18,8 +18,11 @@ import gc import logging import os import platform +import threading import time +import six + import attr from prometheus_client import Counter, Gauge, Histogram from prometheus_client.core import REGISTRY, GaugeMetricFamily @@ -68,7 +71,7 @@ class LaterGauge(object): return if isinstance(calls, dict): - for k, v in calls.items(): + for k, v in six.iteritems(calls): g.add_metric(k, v) else: g.add_metric([], calls) @@ -87,6 +90,109 @@ class LaterGauge(object): all_gauges[self.name] = self +class InFlightGauge(object): + """Tracks number of things (e.g. requests, Measure blocks, etc) in flight + at any given time. + + Each InFlightGauge will create a metric called `<name>_total` that counts + the number of in flight blocks, as well as a metrics for each item in the + given `sub_metrics` as `<name>_<sub_metric>` which will get updated by the + callbacks. + + Args: + name (str) + desc (str) + labels (list[str]) + sub_metrics (list[str]): A list of sub metrics that the callbacks + will update. + """ + + def __init__(self, name, desc, labels, sub_metrics): + self.name = name + self.desc = desc + self.labels = labels + self.sub_metrics = sub_metrics + + # Create a class which have the sub_metrics values as attributes, which + # default to 0 on initialization. Used to pass to registered callbacks. + self._metrics_class = attr.make_class( + "_MetricsEntry", + attrs={x: attr.ib(0) for x in sub_metrics}, + slots=True, + ) + + # Counts number of in flight blocks for a given set of label values + self._registrations = {} + + # Protects access to _registrations + self._lock = threading.Lock() + + self._register_with_collector() + + def register(self, key, callback): + """Registers that we've entered a new block with labels `key`. + + `callback` gets called each time the metrics are collected. The same + value must also be given to `unregister`. + + `callback` gets called with an object that has an attribute per + sub_metric, which should be updated with the necessary values. Note that + the metrics object is shared between all callbacks registered with the + same key. + + Note that `callback` may be called on a separate thread. + """ + with self._lock: + self._registrations.setdefault(key, set()).add(callback) + + def unregister(self, key, callback): + """Registers that we've exited a block with labels `key`. + """ + + with self._lock: + self._registrations.setdefault(key, set()).discard(callback) + + def collect(self): + """Called by prometheus client when it reads metrics. + + Note: may be called by a separate thread. + """ + in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels) + + metrics_by_key = {} + + # We copy so that we don't mutate the list while iterating + with self._lock: + keys = list(self._registrations) + + for key in keys: + with self._lock: + callbacks = set(self._registrations[key]) + + in_flight.add_metric(key, len(callbacks)) + + metrics = self._metrics_class() + metrics_by_key[key] = metrics + for callback in callbacks: + callback(metrics) + + yield in_flight + + for name in self.sub_metrics: + gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels) + for key, metrics in six.iteritems(metrics_by_key): + gauge.add_metric(key, getattr(metrics, name)) + yield gauge + + def _register_with_collector(self): + if self.name in all_gauges.keys(): + logger.warning("%s already registered, reregistering" % (self.name,)) + REGISTRY.unregister(all_gauges.pop(self.name)) + + REGISTRY.register(self) + all_gauges[self.name] = self + + # # Detailed CPU metrics # diff --git a/synapse/notifier.py b/synapse/notifier.py index 82f391481c..f1d92c1395 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -25,11 +25,7 @@ from synapse.api.errors import AuthError from synapse.handlers.presence import format_user_presence_state from synapse.metrics import LaterGauge from synapse.types import StreamToken -from synapse.util.async_helpers import ( - DeferredTimeoutError, - ObservableDeferred, - add_timeout_to_deferred, -) +from synapse.util.async_helpers import ObservableDeferred, timeout_deferred from synapse.util.logcontext import PreserveLoggingContext, run_in_background from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -337,7 +333,7 @@ class Notifier(object): # Now we wait for the _NotifierUserStream to be told there # is a new token. listener = user_stream.new_listener(prev_token) - add_timeout_to_deferred( + listener.deferred = timeout_deferred( listener.deferred, (end_time - now) / 1000., self.hs.get_reactor(), @@ -354,7 +350,7 @@ class Notifier(object): # Update the prev_token to the current_token since nothing # has happened between the old prev_token and the current_token prev_token = current_token - except DeferredTimeoutError: + except defer.TimeoutError: break except defer.CancelledError: break @@ -559,15 +555,16 @@ class Notifier(object): if end_time <= now: break - add_timeout_to_deferred( - listener.deferred.addTimeout, - (end_time - now) / 1000., - self.hs.get_reactor(), + listener.deferred = timeout_deferred( + listener.deferred, + timeout=(end_time - now) / 1000., + reactor=self.hs.get_reactor(), ) + try: with PreserveLoggingContext(): yield listener.deferred - except DeferredTimeoutError: + except defer.TimeoutError: break except defer.CancelledError: break diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 81e18bcf7d..48abd5e4d6 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -15,6 +15,8 @@ # limitations under the License. import logging +import six + from prometheus_client import Counter from twisted.internet import defer @@ -26,6 +28,9 @@ from synapse.util.metrics import Measure from . import push_rule_evaluator, push_tools +if six.PY3: + long = int + logger = logging.getLogger(__name__) http_push_processed_counter = Counter("synapse_http_httppusher_http_pushes_processed", "") @@ -96,7 +101,7 @@ class HttpPusher(object): @defer.inlineCallbacks def on_new_notifications(self, min_stream_ordering, max_stream_ordering): - self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0) yield self._process() @defer.inlineCallbacks diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index bfa6df7b68..b78ce10396 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -17,10 +17,11 @@ import email.mime.multipart import email.utils import logging import time -import urllib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText +from six.moves import urllib + import bleach import jinja2 @@ -474,7 +475,7 @@ class Mailer(object): # XXX: make r0 once API is stable return "%s_matrix/client/unstable/pushers/remove?%s" % ( self.hs.config.public_baseurl, - urllib.urlencode(params), + urllib.parse.urlencode(params), ) @@ -561,7 +562,7 @@ def _create_mxc_to_http_filter(config): return "%s_matrix/media/v1/thumbnail/%s?%s%s" % ( config.public_baseurl, serverAndMediaId, - urllib.urlencode(params), + urllib.parse.urlencode(params), fragment or "", ) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 942d7c721f..0d8de600cf 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -40,9 +40,10 @@ REQUIREMENTS = { "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "Twisted>=17.1.0": ["twisted>=17.1.0"], + "treq>=15.1": ["treq>=15.1"], - # We use crypto.get_elliptic_curve which is only supported in >=0.15 - "pyopenssl>=0.15": ["OpenSSL>=0.15"], + # Twisted has required pyopenssl 16.0 since about Twisted 16.6. + "pyopenssl>=16.0.0": ["OpenSSL>=16.0.0"], "pyyaml": ["yaml"], "pyasn1": ["pyasn1"], diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index 8206a988f7..21b8c468fa 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import six + from synapse.storage import DataStore from synapse.storage.end_to_end_keys import EndToEndKeyStore from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -21,6 +23,13 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker +def __func__(inp): + if six.PY3: + return inp + else: + return inp.__func__ + + class SlavedDeviceStore(BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceStore, self).__init__(db_conn, hs) @@ -38,14 +47,14 @@ class SlavedDeviceStore(BaseSlavedStore): "DeviceListFederationStreamChangeCache", device_list_max, ) - get_device_stream_token = DataStore.get_device_stream_token.__func__ - get_user_whose_devices_changed = DataStore.get_user_whose_devices_changed.__func__ - get_devices_by_remote = DataStore.get_devices_by_remote.__func__ - _get_devices_by_remote_txn = DataStore._get_devices_by_remote_txn.__func__ - _get_e2e_device_keys_txn = DataStore._get_e2e_device_keys_txn.__func__ - mark_as_sent_devices_by_remote = DataStore.mark_as_sent_devices_by_remote.__func__ + get_device_stream_token = __func__(DataStore.get_device_stream_token) + get_user_whose_devices_changed = __func__(DataStore.get_user_whose_devices_changed) + get_devices_by_remote = __func__(DataStore.get_devices_by_remote) + _get_devices_by_remote_txn = __func__(DataStore._get_devices_by_remote_txn) + _get_e2e_device_keys_txn = __func__(DataStore._get_e2e_device_keys_txn) + mark_as_sent_devices_by_remote = __func__(DataStore.mark_as_sent_devices_by_remote) _mark_as_sent_devices_by_remote_txn = ( - DataStore._mark_as_sent_devices_by_remote_txn.__func__ + __func__(DataStore._mark_as_sent_devices_by_remote_txn) ) count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"] diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 74e892c104..5dc7b3fffc 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -590,9 +590,9 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): pending_commands = LaterGauge( "synapse_replication_tcp_protocol_pending_commands", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): len(p.pending_commands) for p in connected_connections + (p.name,): len(p.pending_commands) for p in connected_connections }, ) @@ -607,9 +607,9 @@ def transport_buffer_size(protocol): transport_send_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_send_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_buffer_size(p) for p in connected_connections + (p.name,): transport_buffer_size(p) for p in connected_connections }, ) @@ -632,9 +632,9 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_transport_kernel_send_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_kernel_send_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False) + (p.name,): transport_kernel_read_buffer_size(p, False) for p in connected_connections }, ) @@ -643,9 +643,9 @@ tcp_transport_kernel_send_buffer = LaterGauge( tcp_transport_kernel_read_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_kernel_read_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True) + (p.name,): transport_kernel_read_buffer_size(p, True) for p in connected_connections }, ) @@ -654,9 +654,9 @@ tcp_transport_kernel_read_buffer = LaterGauge( tcp_inbound_commands = LaterGauge( "synapse_replication_tcp_protocol_inbound_commands", "", - ["command", "name", "conn_id"], + ["command", "name"], lambda: { - (k[0], p.name, p.conn_id): count + (k[0], p.name,): count for p in connected_connections for k, count in iteritems(p.inbound_commands_counter) }, @@ -665,9 +665,9 @@ tcp_inbound_commands = LaterGauge( tcp_outbound_commands = LaterGauge( "synapse_replication_tcp_protocol_outbound_commands", "", - ["command", "name", "conn_id"], + ["command", "name"], lambda: { - (k[0], p.name, p.conn_id): count + (k[0], p.name,): count for p in connected_connections for k, count in iteritems(p.outbound_commands_counter) }, diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py index 55fe701c5c..c1e626be3f 100644 --- a/synapse/replication/tcp/streams.py +++ b/synapse/replication/tcp/streams.py @@ -196,7 +196,7 @@ class Stream(object): ) if len(rows) >= MAX_EVENTS_BEHIND: - raise Exception("stream %s has fallen behined" % (self.NAME)) + raise Exception("stream %s has fallen behind" % (self.NAME)) else: rows = yield self.update_function( from_token, current_token, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index ad536ab570..41534b8c2a 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -101,7 +101,7 @@ class UserRegisterServlet(ClientV1RestServlet): nonce = self.hs.get_secrets().token_hex(64) self.nonces[nonce] = int(self.reactor.seconds()) - return (200, {"nonce": nonce.encode('ascii')}) + return (200, {"nonce": nonce}) @defer.inlineCallbacks def on_POST(self, request): @@ -164,7 +164,7 @@ class UserRegisterServlet(ClientV1RestServlet): key=self.hs.config.registration_shared_secret.encode(), digestmod=hashlib.sha1, ) - want_mac.update(nonce) + want_mac.update(nonce.encode('utf8')) want_mac.update(b"\x00") want_mac.update(username) want_mac.update(b"\x00") @@ -173,7 +173,10 @@ class UserRegisterServlet(ClientV1RestServlet): want_mac.update(b"admin" if admin else b"notadmin") want_mac = want_mac.hexdigest() - if not hmac.compare_digest(want_mac, got_mac.encode('ascii')): + if not hmac.compare_digest( + want_mac.encode('ascii'), + got_mac.encode('ascii') + ): raise SynapseError(403, "HMAC incorrect") # Reuse the parts of RegisterRestServlet to reduce code duplication diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/v1/events.py index 0f3a2e8b51..cd9b3bdbd1 100644 --- a/synapse/rest/client/v1/events.py +++ b/synapse/rest/client/v1/events.py @@ -45,20 +45,20 @@ class EventStreamRestServlet(ClientV1RestServlet): is_guest = requester.is_guest room_id = None if is_guest: - if "room_id" not in request.args: + if b"room_id" not in request.args: raise SynapseError(400, "Guest users must specify room_id param") - if "room_id" in request.args: - room_id = request.args["room_id"][0] + if b"room_id" in request.args: + room_id = request.args[b"room_id"][0].decode('ascii') pagin_config = PaginationConfig.from_request(request) timeout = EventStreamRestServlet.DEFAULT_LONGPOLL_TIME_MS - if "timeout" in request.args: + if b"timeout" in request.args: try: - timeout = int(request.args["timeout"][0]) + timeout = int(request.args[b"timeout"][0]) except ValueError: raise SynapseError(400, "timeout must be in milliseconds.") - as_client_event = "raw" not in request.args + as_client_event = b"raw" not in request.args chunk = yield self.event_stream_handler.get_stream( requester.user.to_string(), diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py index fd5f85b53e..3ead75cb77 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/v1/initial_sync.py @@ -32,7 +32,7 @@ class InitialSyncRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): requester = yield self.auth.get_user_by_req(request) - as_client_event = "raw" not in request.args + as_client_event = b"raw" not in request.args pagination_config = PaginationConfig.from_request(request) include_archived = parse_boolean(request, "archived", default=False) content = yield self.initial_sync_handler.snapshot_all_rooms( diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index cb85fa1436..0010699d31 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -14,10 +14,9 @@ # limitations under the License. import logging -import urllib import xml.etree.ElementTree as ET -from six.moves.urllib import parse as urlparse +from six.moves import urllib from canonicaljson import json from saml2 import BINDING_HTTP_POST, config @@ -134,7 +133,7 @@ class LoginRestServlet(ClientV1RestServlet): LoginRestServlet.SAML2_TYPE): relay_state = "" if "relay_state" in login_submission: - relay_state = "&RelayState=" + urllib.quote( + relay_state = "&RelayState=" + urllib.parse.quote( login_submission["relay_state"]) result = { "uri": "%s%s" % (self.idp_redirect_url, relay_state) @@ -366,7 +365,7 @@ class SAML2RestServlet(ClientV1RestServlet): (user_id, token) = yield handler.register_saml2(username) # Forward to the RelayState callback along with ava if 'RelayState' in request.args: - request.redirect(urllib.unquote( + request.redirect(urllib.parse.unquote( request.args['RelayState'][0]) + '?status=authenticated&access_token=' + token + '&user_id=' + user_id + '&ava=' + @@ -377,7 +376,7 @@ class SAML2RestServlet(ClientV1RestServlet): "user_id": user_id, "token": token, "ava": saml2_auth.ava})) elif 'RelayState' in request.args: - request.redirect(urllib.unquote( + request.redirect(urllib.parse.unquote( request.args['RelayState'][0]) + '?status=not_authenticated') finish_request(request) @@ -390,21 +389,22 @@ class CasRedirectServlet(ClientV1RestServlet): def __init__(self, hs): super(CasRedirectServlet, self).__init__(hs) - self.cas_server_url = hs.config.cas_server_url - self.cas_service_url = hs.config.cas_service_url + self.cas_server_url = hs.config.cas_server_url.encode('ascii') + self.cas_service_url = hs.config.cas_service_url.encode('ascii') def on_GET(self, request): args = request.args - if "redirectUrl" not in args: + if b"redirectUrl" not in args: return (400, "Redirect URL not specified for CAS auth") - client_redirect_url_param = urllib.urlencode({ - "redirectUrl": args["redirectUrl"][0] - }) - hs_redirect_url = self.cas_service_url + "/_matrix/client/api/v1/login/cas/ticket" - service_param = urllib.urlencode({ - "service": "%s?%s" % (hs_redirect_url, client_redirect_url_param) - }) - request.redirect("%s/login?%s" % (self.cas_server_url, service_param)) + client_redirect_url_param = urllib.parse.urlencode({ + b"redirectUrl": args[b"redirectUrl"][0] + }).encode('ascii') + hs_redirect_url = (self.cas_service_url + + b"/_matrix/client/api/v1/login/cas/ticket") + service_param = urllib.parse.urlencode({ + b"service": b"%s?%s" % (hs_redirect_url, client_redirect_url_param) + }).encode('ascii') + request.redirect(b"%s/login?%s" % (self.cas_server_url, service_param)) finish_request(request) @@ -422,11 +422,11 @@ class CasTicketServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - client_redirect_url = request.args["redirectUrl"][0] + client_redirect_url = request.args[b"redirectUrl"][0] http_client = self.hs.get_simple_http_client() uri = self.cas_server_url + "/proxyValidate" args = { - "ticket": request.args["ticket"], + "ticket": request.args[b"ticket"][0].decode('ascii'), "service": self.cas_service_url } try: @@ -471,11 +471,11 @@ class CasTicketServlet(ClientV1RestServlet): finish_request(request) def add_login_token_to_redirect_url(self, url, token): - url_parts = list(urlparse.urlparse(url)) - query = dict(urlparse.parse_qsl(url_parts[4])) + url_parts = list(urllib.parse.urlparse(url)) + query = dict(urllib.parse.parse_qsl(url_parts[4])) query.update({"loginToken": token}) - url_parts[4] = urllib.urlencode(query) - return urlparse.urlunparse(url_parts) + url_parts[4] = urllib.parse.urlencode(query).encode('ascii') + return urllib.parse.urlunparse(url_parts) def parse_cas_response(self, cas_response_body): user = None diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py index 6e95d9bec2..9382b1f124 100644 --- a/synapse/rest/client/v1/push_rule.py +++ b/synapse/rest/client/v1/push_rule.py @@ -46,7 +46,7 @@ class PushRuleRestServlet(ClientV1RestServlet): try: priority_class = _priority_class_from_spec(spec) except InvalidRuleException as e: - raise SynapseError(400, e.message) + raise SynapseError(400, str(e)) requester = yield self.auth.get_user_by_req(request) @@ -73,7 +73,7 @@ class PushRuleRestServlet(ClientV1RestServlet): content, ) except InvalidRuleException as e: - raise SynapseError(400, e.message) + raise SynapseError(400, str(e)) before = parse_string(request, "before") if before: @@ -95,9 +95,9 @@ class PushRuleRestServlet(ClientV1RestServlet): ) self.notify_user(user_id) except InconsistentRuleException as e: - raise SynapseError(400, e.message) + raise SynapseError(400, str(e)) except RuleNotFoundException as e: - raise SynapseError(400, e.message) + raise SynapseError(400, str(e)) defer.returnValue((200, {})) @@ -142,10 +142,10 @@ class PushRuleRestServlet(ClientV1RestServlet): PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR ) - if path[0] == '': + if path[0] == b'': defer.returnValue((200, rules)) - elif path[0] == 'global': - path = path[1:] + elif path[0] == b'global': + path = [x.decode('ascii') for x in path[1:]] result = _filter_ruleset_with_path(rules['global'], path) defer.returnValue((200, result)) else: @@ -192,10 +192,10 @@ class PushRuleRestServlet(ClientV1RestServlet): def _rule_spec_from_path(path): if len(path) < 2: raise UnrecognizedRequestError() - if path[0] != 'pushrules': + if path[0] != b'pushrules': raise UnrecognizedRequestError() - scope = path[1] + scope = path[1].decode('ascii') path = path[2:] if scope != 'global': raise UnrecognizedRequestError() @@ -203,13 +203,13 @@ def _rule_spec_from_path(path): if len(path) == 0: raise UnrecognizedRequestError() - template = path[0] + template = path[0].decode('ascii') path = path[1:] if len(path) == 0 or len(path[0]) == 0: raise UnrecognizedRequestError() - rule_id = path[0] + rule_id = path[0].decode('ascii') spec = { 'scope': scope, @@ -220,7 +220,7 @@ def _rule_spec_from_path(path): path = path[1:] if len(path) > 0 and len(path[0]) > 0: - spec['attr'] = path[0] + spec['attr'] = path[0].decode('ascii') return spec diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py index 182a68b1e2..b84f0260f2 100644 --- a/synapse/rest/client/v1/pusher.py +++ b/synapse/rest/client/v1/pusher.py @@ -59,7 +59,7 @@ class PushersRestServlet(ClientV1RestServlet): ] for p in pushers: - for k, v in p.items(): + for k, v in list(p.items()): if k not in allowed_keys: del p[k] @@ -126,7 +126,7 @@ class PushersSetRestServlet(ClientV1RestServlet): profile_tag=content.get('profile_tag', ""), ) except PusherConfigException as pce: - raise SynapseError(400, "Config Error: " + pce.message, + raise SynapseError(400, "Config Error: " + str(pce), errcode=Codes.MISSING_PARAM) self.notifier.on_new_replication_data() diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 976d98387d..663934efd0 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -207,7 +207,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): "sender": requester.user.to_string(), } - if 'ts' in request.args and requester.app_service: + if b'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) event = yield self.event_creation_hander.create_and_send_nonmember_event( @@ -255,7 +255,9 @@ class JoinRoomAliasServlet(ClientV1RestServlet): if RoomID.is_valid(room_identifier): room_id = room_identifier try: - remote_room_hosts = request.args["server_name"] + remote_room_hosts = [ + x.decode('ascii') for x in request.args[b"server_name"] + ] except Exception: remote_room_hosts = None elif RoomAlias.is_valid(room_identifier): @@ -461,10 +463,10 @@ class RoomMessageListRestServlet(ClientV1RestServlet): pagination_config = PaginationConfig.from_request( request, default_limit=10, ) - as_client_event = "raw" not in request.args - filter_bytes = parse_string(request, "filter") + as_client_event = b"raw" not in request.args + filter_bytes = parse_string(request, b"filter", encoding=None) if filter_bytes: - filter_json = urlparse.unquote(filter_bytes).decode("UTF-8") + filter_json = urlparse.unquote(filter_bytes.decode("UTF-8")) event_filter = Filter(json.loads(filter_json)) else: event_filter = None @@ -560,7 +562,7 @@ class RoomEventContextServlet(ClientV1RestServlet): # picking the API shape for symmetry with /messages filter_bytes = parse_string(request, "filter") if filter_bytes: - filter_json = urlparse.unquote(filter_bytes).decode("UTF-8") + filter_json = urlparse.unquote(filter_bytes) event_filter = Filter(json.loads(filter_json)) else: event_filter = None diff --git a/synapse/rest/client/v1/voip.py b/synapse/rest/client/v1/voip.py index 62f4c3d93e..53da905eea 100644 --- a/synapse/rest/client/v1/voip.py +++ b/synapse/rest/client/v1/voip.py @@ -42,7 +42,11 @@ class VoipRestServlet(ClientV1RestServlet): expiry = (self.hs.get_clock().time_msec() + userLifetime) / 1000 username = "%d:%s" % (expiry, requester.user.to_string()) - mac = hmac.new(turnSecret, msg=username, digestmod=hashlib.sha1) + mac = hmac.new( + turnSecret.encode(), + msg=username.encode(), + digestmod=hashlib.sha1 + ) # We need to use standard padded base64 encoding here # encode_base64 because we need to add the standard padding to get the # same result as the TURN server. diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 1275baa1ba..0251146722 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -25,6 +25,7 @@ from synapse.api.errors import SynapseError from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection from synapse.events.utils import ( format_event_for_client_v2_without_room_id, + format_event_raw, serialize_event, ) from synapse.handlers.presence import format_user_presence_state @@ -88,7 +89,7 @@ class SyncRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request): - if "from" in request.args: + if b"from" in request.args: # /events used to use 'from', but /sync uses 'since'. # Lets be helpful and whine if we see a 'from'. raise SynapseError( @@ -175,17 +176,28 @@ class SyncRestServlet(RestServlet): @staticmethod def encode_response(time_now, sync_result, access_token_id, filter): + if filter.event_format == 'client': + event_formatter = format_event_for_client_v2_without_room_id + elif filter.event_format == 'federation': + event_formatter = format_event_raw + else: + raise Exception("Unknown event format %s" % (filter.event_format, )) + joined = SyncRestServlet.encode_joined( - sync_result.joined, time_now, access_token_id, filter.event_fields + sync_result.joined, time_now, access_token_id, + filter.event_fields, + event_formatter, ) invited = SyncRestServlet.encode_invited( sync_result.invited, time_now, access_token_id, + event_formatter, ) archived = SyncRestServlet.encode_archived( sync_result.archived, time_now, access_token_id, filter.event_fields, + event_formatter, ) return { @@ -228,7 +240,7 @@ class SyncRestServlet(RestServlet): } @staticmethod - def encode_joined(rooms, time_now, token_id, event_fields): + def encode_joined(rooms, time_now, token_id, event_fields, event_formatter): """ Encode the joined rooms in a sync result @@ -240,7 +252,9 @@ class SyncRestServlet(RestServlet): token_id(int): ID of the user's auth token - used for namespacing of transaction IDs event_fields(list<str>): List of event fields to include. If empty, - all fields will be returned. + all fields will be returned. + event_formatter (func[dict]): function to convert from federation format + to client format Returns: dict[str, dict[str, object]]: the joined rooms list, in our response format @@ -248,13 +262,14 @@ class SyncRestServlet(RestServlet): joined = {} for room in rooms: joined[room.room_id] = SyncRestServlet.encode_room( - room, time_now, token_id, only_fields=event_fields + room, time_now, token_id, joined=True, only_fields=event_fields, + event_formatter=event_formatter, ) return joined @staticmethod - def encode_invited(rooms, time_now, token_id): + def encode_invited(rooms, time_now, token_id, event_formatter): """ Encode the invited rooms in a sync result @@ -264,7 +279,9 @@ class SyncRestServlet(RestServlet): time_now(int): current time - used as a baseline for age calculations token_id(int): ID of the user's auth token - used for namespacing - of transaction IDs + of transaction IDs + event_formatter (func[dict]): function to convert from federation format + to client format Returns: dict[str, dict[str, object]]: the invited rooms list, in our @@ -274,7 +291,7 @@ class SyncRestServlet(RestServlet): for room in rooms: invite = serialize_event( room.invite, time_now, token_id=token_id, - event_format=format_event_for_client_v2_without_room_id, + event_format=event_formatter, is_invite=True, ) unsigned = dict(invite.get("unsigned", {})) @@ -288,7 +305,7 @@ class SyncRestServlet(RestServlet): return invited @staticmethod - def encode_archived(rooms, time_now, token_id, event_fields): + def encode_archived(rooms, time_now, token_id, event_fields, event_formatter): """ Encode the archived rooms in a sync result @@ -300,7 +317,9 @@ class SyncRestServlet(RestServlet): token_id(int): ID of the user's auth token - used for namespacing of transaction IDs event_fields(list<str>): List of event fields to include. If empty, - all fields will be returned. + all fields will be returned. + event_formatter (func[dict]): function to convert from federation format + to client format Returns: dict[str, dict[str, object]]: The invited rooms list, in our response format @@ -308,13 +327,18 @@ class SyncRestServlet(RestServlet): joined = {} for room in rooms: joined[room.room_id] = SyncRestServlet.encode_room( - room, time_now, token_id, joined=False, only_fields=event_fields + room, time_now, token_id, joined=False, + only_fields=event_fields, + event_formatter=event_formatter, ) return joined @staticmethod - def encode_room(room, time_now, token_id, joined=True, only_fields=None): + def encode_room( + room, time_now, token_id, joined, + only_fields, event_formatter, + ): """ Args: room (JoinedSyncResult|ArchivedSyncResult): sync result for a @@ -326,14 +350,15 @@ class SyncRestServlet(RestServlet): joined (bool): True if the user is joined to this room - will mean we handle ephemeral events only_fields(list<str>): Optional. The list of event fields to include. + event_formatter (func[dict]): function to convert from federation format + to client format Returns: dict[str, object]: the room, encoded in our response format """ def serialize(event): - # TODO(mjark): Respect formatting requirements in the filter. return serialize_event( event, time_now, token_id=token_id, - event_format=format_event_for_client_v2_without_room_id, + event_format=event_formatter, only_event_fields=only_fields, ) diff --git a/synapse/rest/client/v2_alpha/thirdparty.py b/synapse/rest/client/v2_alpha/thirdparty.py index d9d379182e..b9b5d07677 100644 --- a/synapse/rest/client/v2_alpha/thirdparty.py +++ b/synapse/rest/client/v2_alpha/thirdparty.py @@ -79,7 +79,7 @@ class ThirdPartyUserServlet(RestServlet): yield self.auth.get_user_by_req(request, allow_guest=True) fields = request.args - fields.pop("access_token", None) + fields.pop(b"access_token", None) results = yield self.appservice_handler.query_3pe( ThirdPartyEntityKind.USER, protocol, fields @@ -102,7 +102,7 @@ class ThirdPartyLocationServlet(RestServlet): yield self.auth.get_user_by_req(request, allow_guest=True) fields = request.args - fields.pop("access_token", None) + fields.pop(b"access_token", None) results = yield self.appservice_handler.query_3pe( ThirdPartyEntityKind.LOCATION, protocol, fields diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py index b9ee6e1c13..38eb2ee23f 100644 --- a/synapse/rest/key/v1/server_key_resource.py +++ b/synapse/rest/key/v1/server_key_resource.py @@ -88,5 +88,5 @@ class LocalKey(Resource): ) def getChild(self, name, request): - if name == '': + if name == b'': return self diff --git a/synapse/rest/key/v2/__init__.py b/synapse/rest/key/v2/__init__.py index 3491fd2118..cb5abcf826 100644 --- a/synapse/rest/key/v2/__init__.py +++ b/synapse/rest/key/v2/__init__.py @@ -22,5 +22,5 @@ from .remote_key_resource import RemoteKey class KeyApiV2Resource(Resource): def __init__(self, hs): Resource.__init__(self) - self.putChild("server", LocalKey(hs)) - self.putChild("query", RemoteKey(hs)) + self.putChild(b"server", LocalKey(hs)) + self.putChild(b"query", RemoteKey(hs)) diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 7d67e4b064..eb8782aa6e 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -103,7 +103,7 @@ class RemoteKey(Resource): def async_render_GET(self, request): if len(request.postpath) == 1: server, = request.postpath - query = {server: {}} + query = {server.decode('ascii'): {}} elif len(request.postpath) == 2: server, key_id = request.postpath minimum_valid_until_ts = parse_integer( @@ -112,11 +112,12 @@ class RemoteKey(Resource): arguments = {} if minimum_valid_until_ts is not None: arguments["minimum_valid_until_ts"] = minimum_valid_until_ts - query = {server: {key_id: arguments}} + query = {server.decode('ascii'): {key_id.decode('ascii'): arguments}} else: raise SynapseError( 404, "Not found %r" % request.postpath, Codes.NOT_FOUND ) + yield self.query_keys(request, query, query_remote_on_cache_miss=True) def render_POST(self, request): @@ -135,6 +136,7 @@ class RemoteKey(Resource): @defer.inlineCallbacks def query_keys(self, request, query, query_remote_on_cache_miss=False): logger.info("Handling query for keys %r", query) + store_queries = [] for server_name, key_ids in query.items(): if ( diff --git a/synapse/rest/media/v0/content_repository.py b/synapse/rest/media/v0/content_repository.py index f255f2883f..5a426ff2f6 100644 --- a/synapse/rest/media/v0/content_repository.py +++ b/synapse/rest/media/v0/content_repository.py @@ -56,7 +56,7 @@ class ContentRepoResource(resource.Resource): # servers. # TODO: A little crude here, we could do this better. - filename = request.path.split('/')[-1] + filename = request.path.decode('ascii').split('/')[-1] # be paranoid filename = re.sub("[^0-9A-z.-_]", "", filename) @@ -78,7 +78,7 @@ class ContentRepoResource(resource.Resource): # select private. don't bother setting Expires as all our matrix # clients are smart enough to be happy with Cache-Control (right?) request.setHeader( - "Cache-Control", "public,max-age=86400,s-maxage=86400" + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" ) d = FileSender().beginFileTransfer(f, request) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 65f4bd2910..76e479afa3 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -15,9 +15,8 @@ import logging import os -import urllib -from six.moves.urllib import parse as urlparse +from six.moves import urllib from twisted.internet import defer from twisted.protocols.basic import FileSender @@ -35,10 +34,15 @@ def parse_media_id(request): # This allows users to append e.g. /test.png to the URL. Useful for # clients that parse the URL to see content type. server_name, media_id = request.postpath[:2] + + if isinstance(server_name, bytes): + server_name = server_name.decode('utf-8') + media_id = media_id.decode('utf8') + file_name = None if len(request.postpath) > 2: try: - file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") + file_name = urllib.parse.unquote(request.postpath[-1].decode("utf-8")) except UnicodeDecodeError: pass return server_name, media_id, file_name @@ -93,22 +97,18 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): + return urllib.parse.quote(x.encode("utf-8")) + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) + disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") else: - request.setHeader( - b"Content-Disposition", - b"inline; filename*=utf-8''%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) + disposition = ( + "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + + request.setHeader(b"Content-Disposition", disposition) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index fbfa85f74f..ca90964d1d 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -47,12 +47,12 @@ class DownloadResource(Resource): def _async_render_GET(self, request): set_cors_headers(request) request.setHeader( - "Content-Security-Policy", - "default-src 'none';" - " script-src 'none';" - " plugin-types application/pdf;" - " style-src 'unsafe-inline';" - " object-src 'self';" + b"Content-Security-Policy", + b"default-src 'none';" + b" script-src 'none';" + b" plugin-types application/pdf;" + b" style-src 'unsafe-inline';" + b" object-src 'self';" ) server_name, media_id, name = parse_media_id(request) if server_name == self.server_name: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 241c972070..a828ff4438 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -20,7 +20,7 @@ import logging import os import shutil -from six import iteritems +from six import PY3, iteritems from six.moves.urllib import parse as urlparse import twisted.internet.error @@ -397,13 +397,13 @@ class MediaRepository(object): yield finish() - media_type = headers["Content-Type"][0] + media_type = headers[b"Content-Type"][0].decode('ascii') time_now_ms = self.clock.time_msec() - content_disposition = headers.get("Content-Disposition", None) + content_disposition = headers.get(b"Content-Disposition", None) if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) + _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) upload_name = None # First check if there is a valid UTF-8 filename @@ -419,9 +419,13 @@ class MediaRepository(object): upload_name = upload_name_ascii if upload_name: - upload_name = urlparse.unquote(upload_name) + if PY3: + upload_name = urlparse.unquote(upload_name) + else: + upload_name = urlparse.unquote(upload_name.encode('ascii')) try: - upload_name = upload_name.decode("utf-8") + if isinstance(upload_name, bytes): + upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: @@ -755,14 +759,15 @@ class MediaRepositoryResource(Resource): Resource.__init__(self) media_repo = hs.get_media_repository() - self.putChild("upload", UploadResource(hs, media_repo)) - self.putChild("download", DownloadResource(hs, media_repo)) - self.putChild("thumbnail", ThumbnailResource( + + self.putChild(b"upload", UploadResource(hs, media_repo)) + self.putChild(b"download", DownloadResource(hs, media_repo)) + self.putChild(b"thumbnail", ThumbnailResource( hs, media_repo, media_repo.media_storage, )) - self.putChild("identicon", IdenticonResource()) + self.putChild(b"identicon", IdenticonResource()) if hs.config.url_preview_enabled: - self.putChild("preview_url", PreviewUrlResource( + self.putChild(b"preview_url", PreviewUrlResource( hs, media_repo, media_repo.media_storage, )) - self.putChild("config", MediaConfigResource(hs)) + self.putChild(b"config", MediaConfigResource(hs)) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 778ef97337..cad2dec33a 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -261,7 +261,7 @@ class PreviewUrlResource(Resource): logger.debug("Calculated OG for %s as %s" % (url, og)) - jsonog = json.dumps(og) + jsonog = json.dumps(og).encode('utf8') # store OG in history-aware DB cache yield self.store.store_url_cache( @@ -301,20 +301,20 @@ class PreviewUrlResource(Resource): logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % ( - traceback.format_exception_only(sys.exc_type, e), + traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: - if "Content-Type" in headers: - media_type = headers["Content-Type"][0] + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode('ascii') else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get("Content-Disposition", None) + content_disposition = headers.get(b"Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) download_name = None diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 808194236a..cfb687cb53 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -75,7 +75,6 @@ class DirectoryWorkerStore(SQLBaseStore): }, retcol="creator", desc="get_room_alias_creator", - allow_none=True ) @cached(max_entries=5000) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 8bf87f38f7..e7487311ce 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -930,6 +930,10 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore ) self._invalidate_cache_and_stream( + txn, self.get_room_summary, (room_id,) + ) + + self._invalidate_cache_and_stream( txn, self.get_current_state_ids, (room_id,) ) @@ -1886,20 +1890,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore ")" ) - # create an index on should_delete because later we'll be looking for - # the should_delete / shouldn't_delete subsets - txn.execute( - "CREATE INDEX events_to_purge_should_delete" - " ON events_to_purge(should_delete)", - ) - - # We do joins against events_to_purge for e.g. calculating state - # groups to purge, etc., so lets make an index. - txn.execute( - "CREATE INDEX events_to_purge_id" - " ON events_to_purge(event_id)", - ) - # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " @@ -1926,19 +1916,45 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore should_delete_params = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" - should_delete_params += ("%:" + self.hs.hostname, ) + + # We include the parameter twice since we use the expression twice + should_delete_params += ( + "%:" + self.hs.hostname, + "%:" + self.hs.hostname, + ) should_delete_params += (room_id, token.topological) + # Note that we insert events that are outliers and aren't going to be + # deleted, as nothing will happen to them. txn.execute( "INSERT INTO events_to_purge" " SELECT event_id, %s" " FROM events AS e LEFT JOIN state_events USING (event_id)" - " WHERE e.room_id = ? AND topological_ordering < ?" % ( + " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?" + % ( + should_delete_expr, should_delete_expr, ), should_delete_params, ) + + # We create the indices *after* insertion as that's a lot faster. + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute( + "CREATE INDEX events_to_purge_should_delete" + " ON events_to_purge(should_delete)", + ) + + # We do joins against events_to_purge for e.g. calculating state + # groups to purge, etc., so lets make an index. + txn.execute( + "CREATE INDEX events_to_purge_id" + " ON events_to_purge(event_id)", + ) + txn.execute( "SELECT event_id, should_delete FROM events_to_purge" ) diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index f547977600..a1331c1a61 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -134,6 +134,7 @@ class KeyStore(SQLBaseStore): """ key_id = "%s:%s" % (verify_key.alg, verify_key.version) + # XXX fix this to not need a lock (#3819) def _txn(txn): self._simple_upsert_txn( txn, diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index c7899d7fd2..59580949f1 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -147,6 +147,23 @@ class MonthlyActiveUsersStore(SQLBaseStore): return self.runInteraction("count_users", _count_users) @defer.inlineCallbacks + def get_registered_reserved_users_count(self): + """Of the reserved threepids defined in config, how many are associated + with registered users? + + Returns: + Defered[int]: Number of real reserved users + """ + count = 0 + for tp in self.hs.config.mau_limits_reserved_threepids: + user_id = yield self.hs.get_datastore().get_user_id_by_threepid( + tp["medium"], tp["address"] + ) + if user_id: + count = count + 1 + defer.returnValue(count) + + @defer.inlineCallbacks def upsert_monthly_active_user(self, user_id): """ Updates or inserts monthly active user member @@ -199,10 +216,14 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id(str): the user_id to query """ + if self.hs.config.limit_usage_by_mau: + # Trial users and guests should not be included as part of MAU group + is_guest = yield self.is_guest(user_id) + if is_guest: + return is_trial = yield self.is_trial_user(user_id) if is_trial: - # we don't track trial users in the MAU table. return last_seen_timestamp = yield self.user_last_seen_monthly_active(user_id) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 9b4e6d6aa8..0707f9a86a 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -51,6 +51,12 @@ ProfileInfo = namedtuple( "ProfileInfo", ("avatar_url", "display_name") ) +# "members" points to a truncated list of (user_id, event_id) tuples for users of +# a given membership type, suitable for use in calculating heroes for a room. +# "count" points to the total numberr of users of a given membership type. +MemberSummary = namedtuple( + "MemberSummary", ("members", "count") +) _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update" @@ -82,6 +88,65 @@ class RoomMemberWorkerStore(EventsWorkerStore): return [to_ascii(r[0]) for r in txn] return self.runInteraction("get_users_in_room", f) + @cached(max_entries=100000) + def get_room_summary(self, room_id): + """ Get the details of a room roughly suitable for use by the room + summary extension to /sync. Useful when lazy loading room members. + Args: + room_id (str): The room ID to query + Returns: + Deferred[dict[str, MemberSummary]: + dict of membership states, pointing to a MemberSummary named tuple. + """ + + def _get_room_summary_txn(txn): + # first get counts. + # We do this all in one transaction to keep the cache small. + # FIXME: get rid of this when we have room_stats + sql = """ + SELECT count(*), m.membership FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? + GROUP BY m.membership + """ + + txn.execute(sql, (room_id,)) + res = {} + for count, membership in txn: + summary = res.setdefault(to_ascii(membership), MemberSummary([], count)) + + # we order by membership and then fairly arbitrarily by event_id so + # heroes are consistent + sql = """ + SELECT m.user_id, m.membership, m.event_id + FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? + ORDER BY + CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, + m.event_id ASC + LIMIT ? + """ + + # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user. + txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6)) + for user_id, membership, event_id in txn: + summary = res[to_ascii(membership)] + # we will always have a summary for this membership type at this + # point given the summary currently contains the counts. + members = summary.members + members.append((to_ascii(user_id), to_ascii(event_id))) + + return res + + return self.runInteraction("get_room_summary", _get_room_summary_txn) + @cached() def get_invited_rooms_for_user(self, user_id): """ Get all the rooms the user is invited to diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 9b3f2f4b96..ec7b2c9672 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -374,29 +374,25 @@ class ReadWriteLock(object): defer.returnValue(_ctx_manager()) -class DeferredTimeoutError(Exception): - """ - This error is raised by default when a L{Deferred} times out. - """ - +def _cancelled_to_timed_out_error(value, timeout): + if isinstance(value, failure.Failure): + value.trap(CancelledError) + raise defer.TimeoutError(timeout, "Deferred") + return value -def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): - """ - Add a timeout to a deferred by scheduling it to be cancelled after - timeout seconds. - This is essentially a backport of deferred.addTimeout, which was introduced - in twisted 16.5. +def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): + """The in built twisted `Deferred.addTimeout` fails to time out deferreds + that have a canceller that throws exceptions. This method creates a new + deferred that wraps and times out the given deferred, correctly handling + the case where the given deferred's canceller throws. - If the deferred gets timed out, it errbacks with a DeferredTimeoutError, - unless a cancelable function was passed to its initialization or unless - a different on_timeout_cancel callable is provided. + NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred Args: - deferred (defer.Deferred): deferred to be timed out - timeout (Number): seconds to time out after - reactor (twisted.internet.reactor): the Twisted reactor to use - + deferred (Deferred) + timeout (float): Timeout in seconds + reactor (twisted.internet.reactor): The twisted reactor to use on_timeout_cancel (callable): A callable which is called immediately after the deferred times out, and not if this deferred is otherwise cancelled before the timeout. @@ -406,13 +402,26 @@ def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): the timeout. The default callable (if none is provided) will translate a - CancelledError Failure into a DeferredTimeoutError. + CancelledError Failure into a defer.TimeoutError. + + Returns: + Deferred """ + + new_d = defer.Deferred() + timed_out = [False] def time_it_out(): timed_out[0] = True - deferred.cancel() + + try: + deferred.cancel() + except: # noqa: E722, if we throw any exception it'll break time outs + logger.exception("Canceller failed during timeout") + + if not new_d.called: + new_d.errback(defer.TimeoutError(timeout, "Deferred")) delayed_call = reactor.callLater(timeout, time_it_out) @@ -432,9 +441,14 @@ def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): deferred.addBoth(cancel_timeout) + def success_cb(val): + if not new_d.called: + new_d.callback(val) -def _cancelled_to_timed_out_error(value, timeout): - if isinstance(value, failure.Failure): - value.trap(CancelledError) - raise DeferredTimeoutError(timeout, "Deferred") - return value + def failure_cb(val): + if not new_d.called: + new_d.errback(val) + + deferred.addCallbacks(success_cb, failure_cb) + + return new_d diff --git a/synapse/util/manhole.py b/synapse/util/manhole.py index 14be3c7396..8d0f2a8918 100644 --- a/synapse/util/manhole.py +++ b/synapse/util/manhole.py @@ -19,22 +19,40 @@ from twisted.conch.ssh.keys import Key from twisted.cred import checkers, portal PUBLIC_KEY = ( - "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAGEArzJx8OYOnJmzf4tfBEvLi8DVPrJ3/c9k2I/Az" - "64fxjHf9imyRJbixtQhlH9lfNjUIx+4LmrJH5QNRsFporcHDKOTwTTYLh5KmRpslkYHRivcJS" - "kbh/C+BR3utDS555mV" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHhGATaW4KhE23+7nrH4jFx3yLq9OjaEs5" + "XALqeK+7385NlLja3DE/DO9mGhnd9+bAy39EKT3sTV6+WXQ4yD0TvEEyUEMtjWkSEm6U32+C" + "DaS3TW/vPBUMeJQwq+Ydcif1UlnpXrDDTamD0AU9VaEvHq+3HAkipqn0TGpKON6aqk4vauDx" + "oXSsV5TXBVrxP/y7HpMOpU4GUWsaaacBTKKNnUaQB4UflvydaPJUuwdaCUJGTMjbhWrjVfK+" + "jslseSPxU6XvrkZMyCr4znxvuDxjMk1RGIdO7v+rbBMLEgqtSMNqJbYeVCnj2CFgc3fcTcld" + "X2uOJDrJb/WRlHulthCh" ) PRIVATE_KEY = """-----BEGIN RSA PRIVATE KEY----- -MIIByAIBAAJhAK8ycfDmDpyZs3+LXwRLy4vA1T6yd/3PZNiPwM+uH8Yx3/YpskSW -4sbUIZR/ZXzY1CMfuC5qyR+UDUbBaaK3Bwyjk8E02C4eSpkabJZGB0Yr3CUpG4fw -vgUd7rQ0ueeZlQIBIwJgbh+1VZfr7WftK5lu7MHtqE1S1vPWZQYE3+VUn8yJADyb -Z4fsZaCrzW9lkIqXkE3GIY+ojdhZhkO1gbG0118sIgphwSWKRxK0mvh6ERxKqIt1 -xJEJO74EykXZV4oNJ8sjAjEA3J9r2ZghVhGN6V8DnQrTk24Td0E8hU8AcP0FVP+8 -PQm/g/aXf2QQkQT+omdHVEJrAjEAy0pL0EBH6EVS98evDCBtQw22OZT52qXlAwZ2 -gyTriKFVoqjeEjt3SZKKqXHSApP/AjBLpF99zcJJZRq2abgYlf9lv1chkrWqDHUu -DZttmYJeEfiFBBavVYIF1dOlZT0G8jMCMBc7sOSZodFnAiryP+Qg9otSBjJ3bQML -pSTqy7c3a2AScC/YyOwkDaICHnnD3XyjMwIxALRzl0tQEKMXs6hH8ToUdlLROCrP -EhQ0wahUTCk1gKA4uPD6TMTChavbh4K63OvbKg== +MIIEpQIBAAKCAQEAx4RgE2luCoRNt/u56x+Ixcd8i6vTo2hLOVwC6nivu9/OTZS4 +2twxPwzvZhoZ3ffmwMt/RCk97E1evll0OMg9E7xBMlBDLY1pEhJulN9vgg2kt01v +7zwVDHiUMKvmHXIn9VJZ6V6ww02pg9AFPVWhLx6vtxwJIqap9ExqSjjemqpOL2rg +8aF0rFeU1wVa8T/8ux6TDqVOBlFrGmmnAUyijZ1GkAeFH5b8nWjyVLsHWglCRkzI +24Vq41Xyvo7JbHkj8VOl765GTMgq+M58b7g8YzJNURiHTu7/q2wTCxIKrUjDaiW2 +HlQp49ghYHN33E3JXV9rjiQ6yW/1kZR7pbYQoQIDAQABAoIBAQC8KJ0q8Wzzwh5B +esa1dQHZ8+4DEsL/Amae66VcVwD0X3cCN1W2IZ7X5W0Ij2kBqr8V51RYhcR+S+Ek +BtzSiBUBvbKGrqcMGKaUgomDIMzai99hd0gvCCyZnEW1OQhFkNkaRNXCfqiZJ27M +fqvSUiU2eOwh9fCvmxoA6Of8o3FbzcJ+1GMcobWRllDtLmj6lgVbDzuA+0jC5daB +9Tj1pBzu3wn3ufxiS+gBnJ+7NcXH3E73lqCcPa2ufbZ1haxfiGCnRIhFXuQDgxFX +vKdEfDgtvas6r1ahGbc+b/q8E8fZT7cABuIU4yfOORK+MhpyWbvoyyzuVGKj3PKt +KSPJu5CZAoGBAOkoJfAVyYteqKcmGTanGqQnAY43CaYf6GdSPX/jg+JmKZg0zqMC +jWZUtPb93i+jnOInbrnuHOiHAxI8wmhEPed28H2lC/LU8PzlqFkZXKFZ4vLOhhRB +/HeHCFIDosPFlohWi3b+GAjD7sXgnIuGmnXWe2ea/TS3yersifDEoKKjAoGBANsQ +gJX2cJv1c3jhdgcs8vAt5zIOKcCLTOr/QPmVf/kxjNgndswcKHwsxE/voTO9q+TF +v/6yCSTxAdjuKz1oIYWgi/dZo82bBKWxNRpgrGviU3/zwxiHlyIXUhzQu78q3VS/ +7S1XVbc7qMV++XkYKHPVD+nVG/gGzFxumX7MLXfrAoGBAJit9cn2OnjNj9uFE1W6 +r7N254ndeLAUjPe73xH0RtTm2a4WRopwjW/JYIetTuYbWgyujc+robqTTuuOZjAp +H/CG7o0Ym251CypQqaFO/l2aowclPp/dZhpPjp9GSjuxFBZLtiBB3DNBOwbRQzIK +/vLTdRQvZkgzYkI4i0vjNt3JAoGBANP8HSKBLymMlShlrSx2b8TB9tc2Y2riohVJ +2ttqs0M2kt/dGJWdrgOz4mikL+983Olt/0P9juHDoxEEMK2kpcPEv40lnmBpYU7h +s8yJvnBLvJe2EJYdJ8AipyAhUX1FgpbvfxmASP8eaUxsegeXvBWTGWojAoS6N2o+ +0KSl+l3vAoGAFqm0gO9f/Q1Se60YQd4l2PZeMnJFv0slpgHHUwegmd6wJhOD7zJ1 +CkZcXwiv7Nog7AI9qKJEUXLjoqL+vJskBzSOqU3tcd670YQMi1aXSXJqYE202K7o +EddTrx3TNpr1D5m/f+6mnXWrc8u9y1+GNx9yz889xMjIBTBI9KqaaOs= -----END RSA PRIVATE KEY-----""" diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 97f1267380..4b4ac5f6c7 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -20,6 +20,7 @@ from prometheus_client import Counter from twisted.internet import defer +from synapse.metrics import InFlightGauge from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) @@ -45,6 +46,13 @@ block_db_txn_duration = Counter( block_db_sched_duration = Counter( "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) +# Tracks the number of blocks currently active +in_flight = InFlightGauge( + "synapse_util_metrics_block_in_flight", "", + labels=["block_name"], + sub_metrics=["real_time_max", "real_time_sum"], +) + def measure_func(name): def wrapper(func): @@ -82,10 +90,14 @@ class Measure(object): self.start_usage = self.start_context.get_resource_usage() + in_flight.register((self.name,), self._update_in_flight) + def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: return + in_flight.unregister((self.name,), self._update_in_flight) + duration = self.clock.time() - self.start block_counter.labels(self.name).inc() @@ -120,3 +132,13 @@ class Measure(object): if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) + + def _update_in_flight(self, metrics): + """Gets called when processing in flight metrics + """ + duration = self.clock.time() - self.start + + metrics.real_time_max = max(metrics.real_time_max, duration) + metrics.real_time_sum += duration + + # TODO: Add other in flight metrics. |