diff options
Diffstat (limited to 'synapse')
24 files changed, 184 insertions, 366 deletions
diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 6d9f1ca0ef..f78695b657 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -28,7 +28,6 @@ FEDERATION_PREFIX = "/_matrix/federation/v1" STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" -SERVER_KEY_PREFIX = "/_matrix/key/v1" SERVER_KEY_V2_PREFIX = "/_matrix/key/v2" MEDIA_PREFIX = "/_matrix/media/r0" LEGACY_MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 593e1e75db..415374a2ce 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -37,7 +37,6 @@ from synapse.api.urls import ( FEDERATION_PREFIX, LEGACY_MEDIA_PREFIX, MEDIA_PREFIX, - SERVER_KEY_PREFIX, SERVER_KEY_V2_PREFIX, STATIC_PREFIX, WEB_CLIENT_PREFIX, @@ -59,7 +58,6 @@ from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, check_requirem from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource -from synapse.rest.key.v1.server_key_resource import LocalKey from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.media.v0.content_repository import ContentRepoResource from synapse.server import HomeServer @@ -236,10 +234,7 @@ class SynapseHomeServer(HomeServer): ) if name in ["keys", "federation"]: - resources.update({ - SERVER_KEY_PREFIX: LocalKey(self), - SERVER_KEY_V2_PREFIX: KeyApiV2Resource(self), - }) + resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) if name == "webclient": resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self) diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index 080c81f14b..d40e4b8591 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -15,6 +15,8 @@ import logging +from six.moves import urllib + from canonicaljson import json from twisted.internet import defer, reactor @@ -28,15 +30,15 @@ from synapse.util import logcontext logger = logging.getLogger(__name__) -KEY_API_V1 = b"/_matrix/key/v1/" +KEY_API_V2 = "/_matrix/key/v2/server/%s" @defer.inlineCallbacks -def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1): +def fetch_server_key(server_name, tls_client_options_factory, key_id): """Fetch the keys for a remote server.""" factory = SynapseKeyClientFactory() - factory.path = path + factory.path = KEY_API_V2 % (urllib.parse.quote(key_id), ) factory.host = server_name endpoint = matrix_federation_endpoint( reactor, server_name, tls_client_options_factory, timeout=30 diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index d89f94c219..515ebbc148 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd. +# Copyright 2017, 2018 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,8 +18,6 @@ import hashlib import logging from collections import namedtuple -from six.moves import urllib - from signedjson.key import ( decode_verify_key_bytes, encode_verify_key_base64, @@ -395,32 +393,13 @@ class Keyring(object): @defer.inlineCallbacks def get_keys_from_server(self, server_name_and_key_ids): - @defer.inlineCallbacks - def get_key(server_name, key_ids): - keys = None - try: - keys = yield self.get_server_verify_key_v2_direct( - server_name, key_ids - ) - except Exception as e: - logger.info( - "Unable to get key %r for %r directly: %s %s", - key_ids, server_name, - type(e).__name__, str(e), - ) - - if not keys: - keys = yield self.get_server_verify_key_v1_direct( - server_name, key_ids - ) - - keys = {server_name: keys} - - defer.returnValue(keys) - results = yield logcontext.make_deferred_yieldable(defer.gatherResults( [ - run_in_background(get_key, server_name, key_ids) + run_in_background( + self.get_server_verify_key_v2_direct, + server_name, + key_ids, + ) for server_name, key_ids in server_name_and_key_ids ], consumeErrors=True, @@ -525,10 +504,7 @@ class Keyring(object): continue (response, tls_certificate) = yield fetch_server_key( - server_name, self.hs.tls_client_options_factory, - path=("/_matrix/key/v2/server/%s" % ( - urllib.parse.quote(requested_key_id), - )).encode("ascii"), + server_name, self.hs.tls_client_options_factory, requested_key_id ) if (u"signatures" not in response @@ -657,78 +633,6 @@ class Keyring(object): defer.returnValue(results) - @defer.inlineCallbacks - def get_server_verify_key_v1_direct(self, server_name, key_ids): - """Finds a verification key for the server with one of the key ids. - Args: - server_name (str): The name of the server to fetch a key for. - keys_ids (list of str): The key_ids to check for. - """ - - # Try to fetch the key from the remote server. - - (response, tls_certificate) = yield fetch_server_key( - server_name, self.hs.tls_client_options_factory - ) - - # Check the response. - - x509_certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, tls_certificate - ) - - if ("signatures" not in response - or server_name not in response["signatures"]): - raise KeyLookupError("Key response not signed by remote server") - - if "tls_certificate" not in response: - raise KeyLookupError("Key response missing TLS certificate") - - tls_certificate_b64 = response["tls_certificate"] - - if encode_base64(x509_certificate_bytes) != tls_certificate_b64: - raise KeyLookupError("TLS certificate doesn't match") - - # Cache the result in the datastore. - - time_now_ms = self.clock.time_msec() - - verify_keys = {} - for key_id, key_base64 in response["verify_keys"].items(): - if is_signing_algorithm_supported(key_id): - key_bytes = decode_base64(key_base64) - verify_key = decode_verify_key_bytes(key_id, key_bytes) - verify_key.time_added = time_now_ms - verify_keys[key_id] = verify_key - - for key_id in response["signatures"][server_name]: - if key_id not in response["verify_keys"]: - raise KeyLookupError( - "Key response must include verification keys for all" - " signatures" - ) - if key_id in verify_keys: - verify_signed_json( - response, - server_name, - verify_keys[key_id] - ) - - yield self.store.store_server_certificate( - server_name, - server_name, - time_now_ms, - tls_certificate, - ) - - yield self.store_keys( - server_name=server_name, - from_server=server_name, - verify_keys=verify_keys, - ) - - defer.returnValue(verify_keys) - def store_keys(self, server_name, from_server, verify_keys): """Store a collection of verify keys for a given server Args: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 0f9302a6a8..fa2cc550e2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -324,11 +324,6 @@ class FederationServer(FederationBase): defer.returnValue((404, "")) @defer.inlineCallbacks - @log_function - def on_pull_request(self, origin, versions): - raise NotImplementedError("Pull transactions not implemented") - - @defer.inlineCallbacks def on_query_request(self, query_type, args): received_queries_counter.labels(query_type).inc() resp = yield self.registry.on_query(query_type, args) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 7288d49074..3553f418f1 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -362,14 +362,6 @@ class FederationSendServlet(BaseFederationServlet): defer.returnValue((code, response)) -class FederationPullServlet(BaseFederationServlet): - PATH = "/pull/" - - # This is for when someone asks us for everything since version X - def on_GET(self, origin, content, query): - return self.handler.on_pull_request(query["origin"][0], query["v"]) - - class FederationEventServlet(BaseFederationServlet): PATH = "/event/(?P<event_id>[^/]*)/" @@ -1261,7 +1253,6 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): FEDERATION_SERVLET_CLASSES = ( FederationSendServlet, - FederationPullServlet, FederationEventServlet, FederationStateServlet, FederationStateIdsServlet, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 969e588e73..a7cd779b02 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -427,6 +427,9 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) + logger.info( + "Not bothering to persist duplicate state event %s", event.event_id, + ) if prev_state is not None: defer.returnValue(prev_state) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 7b4549223f..d2beb275cf 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -256,9 +256,6 @@ class RegistrationHandler(BaseHandler): except Exception as e: logger.error("Failed to join new user to %r: %r", r, e) - # We used to generate default identicons here, but nowadays - # we want clients to generate their own as part of their branding - # rather than there being consistent matrix-wide ones, so we don't. defer.returnValue((user_id, token)) @defer.inlineCallbacks diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9ff4656717..3928faa6e7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -104,6 +104,8 @@ class RoomCreationHandler(BaseHandler): creator_id=user_id, is_public=r["is_public"], ) + logger.info("Creating new room %s to replace %s", new_room_id, old_room_id) + # we create and auth the tombstone event before properly creating the new # room, to check our user has perms in the old room. tombstone_event, tombstone_context = ( @@ -626,6 +628,7 @@ class RoomCreationHandler(BaseHandler): @defer.inlineCallbacks def send(etype, content, **kwargs): event = create(etype, content, **kwargs) + logger.info("Sending %s in new room", etype) yield self.event_creation_handler.create_and_send_nonmember_event( creator, event, @@ -648,6 +651,7 @@ class RoomCreationHandler(BaseHandler): content=creation_content, ) + logger.info("Sending %s in new room", EventTypes.Member) yield self.room_member_handler.update_membership( creator, creator.user, diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 0c1d52fd11..80e7b15de8 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -24,6 +24,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.errors import SynapseError from synapse.api.filtering import Filter from synapse.events.utils import serialize_event +from synapse.storage.state import StateFilter from synapse.visibility import filter_events_for_client from ._base import BaseHandler @@ -324,9 +325,12 @@ class SearchHandler(BaseHandler): else: last_event_id = event.event_id + state_filter = StateFilter.from_types( + [(EventTypes.Member, sender) for sender in senders] + ) + state = yield self.store.get_state_for_event( - last_event_id, - types=[(EventTypes.Member, sender) for sender in senders] + last_event_id, state_filter ) res["profile_info"] = { diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index f369124258..50e1007d84 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -85,7 +85,10 @@ class EmailPusher(object): self.timed_call = None def on_new_notifications(self, min_stream_ordering, max_stream_ordering): - self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + if self.max_stream_ordering: + self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + else: + self.max_stream_ordering = max_stream_ordering self._start_processing() def on_new_receipts(self, min_stream_id, max_stream_id): diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 16fb5e8471..ebcb93bfc7 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -26,7 +26,6 @@ import bleach import jinja2 from twisted.internet import defer -from twisted.mail.smtp import sendmail from synapse.api.constants import EventTypes from synapse.api.errors import StoreError @@ -85,6 +84,7 @@ class Mailer(object): self.notif_template_html = notif_template_html self.notif_template_text = notif_template_text + self.sendmail = self.hs.get_sendmail() self.store = self.hs.get_datastore() self.macaroon_gen = self.hs.get_macaroon_generator() self.state_handler = self.hs.get_state_handler() @@ -191,11 +191,11 @@ class Mailer(object): multipart_msg.attach(html_part) logger.info("Sending email push notification to %s" % email_address) - # logger.debug(html_text) - yield sendmail( + yield self.sendmail( self.hs.config.email_smtp_host, - raw_from, raw_to, multipart_msg.as_string(), + raw_from, raw_to, multipart_msg.as_string().encode('utf8'), + reactor=self.hs.get_reactor(), port=self.hs.config.email_smtp_port, requireAuthentication=self.hs.config.email_smtp_user is not None, username=self.hs.config.email_smtp_user, @@ -333,7 +333,7 @@ class Mailer(object): notif_events, user_id, reason): if len(notifs_by_room) == 1: # Only one room has new stuff - room_id = notifs_by_room.keys()[0] + room_id = list(notifs_by_room.keys())[0] # If the room has some kind of name, use it, but we don't # want the generated-from-names one here otherwise we'll diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 943876456b..ca62ee7637 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -51,7 +51,6 @@ REQUIREMENTS = { "daemonize>=2.3.1": ["daemonize"], "bcrypt>=3.1.0": ["bcrypt>=3.1.0"], "pillow>=3.1.2": ["PIL"], - "pydenticon>=0.2": ["pydenticon"], "sortedcontainers>=1.4.4": ["sortedcontainers"], "psutil>=2.0.0": ["psutil>=2.0.0"], "pysaml2>=3.0.0": ["saml2"], diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 5dc7b3fffc..0b3fe6cbf5 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -656,7 +656,7 @@ tcp_inbound_commands = LaterGauge( "", ["command", "name"], lambda: { - (k[0], p.name,): count + (k, p.name,): count for p in connected_connections for k, count in iteritems(p.inbound_commands_counter) }, @@ -667,7 +667,7 @@ tcp_outbound_commands = LaterGauge( "", ["command", "name"], lambda: { - (k[0], p.name,): count + (k, p.name,): count for p in connected_connections for k, count in iteritems(p.outbound_commands_counter) }, diff --git a/synapse/rest/key/v1/__init__.py b/synapse/rest/key/v1/__init__.py deleted file mode 100644 index fe0ac3f8e9..0000000000 --- a/synapse/rest/key/v1/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py deleted file mode 100644 index 38eb2ee23f..0000000000 --- a/synapse/rest/key/v1/server_key_resource.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import logging - -from canonicaljson import encode_canonical_json -from signedjson.sign import sign_json -from unpaddedbase64 import encode_base64 - -from OpenSSL import crypto -from twisted.web.resource import Resource - -from synapse.http.server import respond_with_json_bytes - -logger = logging.getLogger(__name__) - - -class LocalKey(Resource): - """HTTP resource containing encoding the TLS X.509 certificate and NACL - signature verification keys for this server:: - - GET /key HTTP/1.1 - - HTTP/1.1 200 OK - Content-Type: application/json - { - "server_name": "this.server.example.com" - "verify_keys": { - "algorithm:version": # base64 encoded NACL verification key. - }, - "tls_certificate": # base64 ASN.1 DER encoded X.509 tls cert. - "signatures": { - "this.server.example.com": { - "algorithm:version": # NACL signature for this server. - } - } - } - """ - - def __init__(self, hs): - self.response_body = encode_canonical_json( - self.response_json_object(hs.config) - ) - Resource.__init__(self) - - @staticmethod - def response_json_object(server_config): - verify_keys = {} - for key in server_config.signing_key: - verify_key_bytes = key.verify_key.encode() - key_id = "%s:%s" % (key.alg, key.version) - verify_keys[key_id] = encode_base64(verify_key_bytes) - - x509_certificate_bytes = crypto.dump_certificate( - crypto.FILETYPE_ASN1, - server_config.tls_certificate - ) - json_object = { - u"server_name": server_config.server_name, - u"verify_keys": verify_keys, - u"tls_certificate": encode_base64(x509_certificate_bytes) - } - for key in server_config.signing_key: - json_object = sign_json( - json_object, - server_config.server_name, - key, - ) - - return json_object - - def render_GET(self, request): - return respond_with_json_bytes( - request, 200, self.response_body, - ) - - def getChild(self, name, request): - if name == b'': - return self diff --git a/synapse/rest/media/v1/identicon_resource.py b/synapse/rest/media/v1/identicon_resource.py deleted file mode 100644 index bdbd8d50dd..0000000000 --- a/synapse/rest/media/v1/identicon_resource.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pydenticon import Generator - -from twisted.web.resource import Resource - -from synapse.http.servlet import parse_integer - -FOREGROUND = [ - "rgb(45,79,255)", - "rgb(254,180,44)", - "rgb(226,121,234)", - "rgb(30,179,253)", - "rgb(232,77,65)", - "rgb(49,203,115)", - "rgb(141,69,170)" -] - -BACKGROUND = "rgb(224,224,224)" -SIZE = 5 - - -class IdenticonResource(Resource): - isLeaf = True - - def __init__(self): - Resource.__init__(self) - self.generator = Generator( - SIZE, SIZE, foreground=FOREGROUND, background=BACKGROUND, - ) - - def generate_identicon(self, name, width, height): - v_padding = width % SIZE - h_padding = height % SIZE - top_padding = v_padding // 2 - left_padding = h_padding // 2 - bottom_padding = v_padding - top_padding - right_padding = h_padding - left_padding - width -= v_padding - height -= h_padding - padding = (top_padding, bottom_padding, left_padding, right_padding) - identicon = self.generator.generate( - name, width, height, padding=padding - ) - return identicon - - def render_GET(self, request): - name = "/".join(request.postpath) - width = parse_integer(request, "width", default=96) - height = parse_integer(request, "height", default=96) - identicon_bytes = self.generate_identicon(name, width, height) - request.setHeader(b"Content-Type", b"image/png") - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - return identicon_bytes diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 08b1867fab..d6c5f07af0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -45,7 +45,6 @@ from ._base import FileInfo, respond_404, respond_with_responder from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths -from .identicon_resource import IdenticonResource from .media_storage import MediaStorage from .preview_url_resource import PreviewUrlResource from .storage_provider import StorageProviderWrapper @@ -769,7 +768,6 @@ class MediaRepositoryResource(Resource): self.putChild(b"thumbnail", ThumbnailResource( hs, media_repo, media_repo.media_storage, )) - self.putChild(b"identicon", IdenticonResource()) if hs.config.url_preview_enabled: self.putChild(b"preview_url", PreviewUrlResource( hs, media_repo, media_repo.media_storage, diff --git a/synapse/server.py b/synapse/server.py index cf6b872cbd..9985687b95 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -23,6 +23,7 @@ import abc import logging from twisted.enterprise import adbapi +from twisted.mail.smtp import sendmail from twisted.web.client import BrowserLikePolicyForHTTPS from synapse.api.auth import Auth @@ -174,6 +175,7 @@ class HomeServer(object): 'message_handler', 'pagination_handler', 'room_context_handler', + 'sendmail', ] # This is overridden in derived application classes @@ -269,6 +271,9 @@ class HomeServer(object): def build_room_creation_handler(self): return RoomCreationHandler(self) + def build_sendmail(self): + return sendmail + def build_state_handler(self): return StateHandler(self) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d10ff9e4b9..62497ab63f 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -589,10 +589,14 @@ class DeviceStore(SQLBaseStore): combined list of changes to devices, and which destinations need to be poked. `destination` may be None if no destinations need to be poked. """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. sql = """ - SELECT stream_id, user_id, destination FROM device_lists_stream + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination """ return self._execute( "get_all_device_list_changes_for_remotes", None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 8881b009df..919e855f3b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,6 +38,7 @@ from synapse.state import StateResolutionStore from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.events_worker import EventsWorkerStore +from synapse.storage.state import StateGroupWorkerStore from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util import batch_iter from synapse.util.async_helpers import ObservableDeferred @@ -205,7 +206,8 @@ def _retry_on_integrity_error(func): # inherits from EventFederationStore so that we can call _update_backward_extremities # and _handle_mult_prev_events (though arguably those could both be moved in here) -class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore): +class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore, + BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -2034,55 +2036,37 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] finding redundant state groups") - # Get all state groups that are only referenced by events that are - # to be deleted. - # This works by first getting state groups that we may want to delete, - # joining against event_to_state_groups to get events that use that - # state group, then left joining against events_to_purge again. Any - # state group where the left join produce *no nulls* are referenced - # only by events that are going to be purged. + # Get all state groups that are referenced by events that are to be + # deleted. We then go and check if they are referenced by other events + # or state groups, and if not we delete them. txn.execute(""" - SELECT state_group FROM - ( - SELECT DISTINCT state_group FROM events_to_purge - INNER JOIN event_to_state_groups USING (event_id) - ) AS sp - INNER JOIN event_to_state_groups USING (state_group) - LEFT JOIN events_to_purge AS ep USING (event_id) - GROUP BY state_group - HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0 + SELECT DISTINCT state_group FROM events_to_purge + INNER JOIN event_to_state_groups USING (event_id) """) - state_rows = txn.fetchall() - logger.info("[purge] found %i redundant state groups", len(state_rows)) - - # make a set of the redundant state groups, so that we can look them up - # efficiently - state_groups_to_delete = set([sg for sg, in state_rows]) - - # Now we get all the state groups that rely on these state groups - logger.info("[purge] finding state groups which depend on redundant" - " state groups") - remaining_state_groups = [] - for i in range(0, len(state_rows), 100): - chunk = [sg for sg, in state_rows[i:i + 100]] - # look for state groups whose prev_state_group is one we are about - # to delete - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=chunk, - retcols=["state_group"], - keyvalues={}, - ) - remaining_state_groups.extend( - row["state_group"] for row in rows + referenced_state_groups = set(sg for sg, in txn) + logger.info( + "[purge] found %i referenced state groups", + len(referenced_state_groups), + ) - # exclude state groups we are about to delete: no point in - # updating them - if row["state_group"] not in state_groups_to_delete + logger.info("[purge] finding state groups that can be deleted") + + state_groups_to_delete, remaining_state_groups = ( + self._find_unreferenced_groups_during_purge( + txn, referenced_state_groups, ) + ) + + logger.info( + "[purge] found %i state groups to delete", + len(state_groups_to_delete), + ) + + logger.info( + "[purge] de-delta-ing %i remaining state groups", + len(remaining_state_groups), + ) # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. @@ -2127,11 +2111,11 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) txn.executemany( "DELETE FROM state_groups WHERE id = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) logger.info("[purge] removing events from event_to_state_groups") @@ -2227,6 +2211,85 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] done") + def _find_unreferenced_groups_during_purge(self, txn, state_groups): + """Used when purging history to figure out which state groups can be + deleted and which need to be de-delta'ed (due to one of its prev groups + being scheduled for deletion). + + Args: + txn + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. + + Returns: + tuple[set[int], set[int]]: The set of state groups that can be + deleted and the set of state groups that need to be de-delta'ed + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + current_search = set(itertools.islice(next_to_search, 100)) + next_to_search -= current_search + + # Check if state groups are referenced + sql = """ + SELECT DISTINCT state_group FROM event_to_state_groups + LEFT JOIN events_to_purge AS ep USING (event_id) + WHERE state_group IN (%s) AND ep.event_id IS NULL + """ % (",".join("?" for _ in current_search),) + txn.execute(sql, list(current_search)) + + referenced = set(sg for sg, in txn) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + rows = self._simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=current_search, + keyvalues={}, + retcols=("prev_state_group", "state_group",), + ) + + prevs = set(row["state_group"] for row in rows) + # We don't bother re-handling groups we've already seen + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs + + for row in rows: + # Note: Each state group can have at most one prev group + graph[row["state_group"]] = row["prev_state_group"] + + to_delete = state_groups_seen - referenced_groups + + to_dedelta = set() + for sg in referenced_groups: + prev_sg = graph.get(sg) + if prev_sg and prev_sg in to_delete: + to_dedelta.add(sg) + + return to_delete, to_dedelta + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b364719312..bd740e1e45 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 51 +SCHEMA_VERSION = 52 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql new file mode 100644 index 0000000000..91e03d13e1 --- /dev/null +++ b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This is needed to efficiently check for unreferenced state groups during +-- purge. Added events_to_state_group(state_group) index +INSERT into background_updates (update_name, progress_json) + VALUES ('event_to_state_groups_sg_index', '{}'); diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ef65929bb2..d737bd6778 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1257,6 +1257,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" + EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): super(StateStore, self).__init__(db_conn, hs) @@ -1275,6 +1276,12 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_key"], where_clause="type='m.room.member'", ) + self.register_background_index_update( + self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME, + index_name="event_to_state_groups_sg_index", + table="event_to_state_groups", + columns=["state_group"], + ) def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} |