diff --git a/synapse/replication/expire_cache.py b/synapse/replication/expire_cache.py
deleted file mode 100644
index c05a50d7a6..0000000000
--- a/synapse/replication/expire_cache.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.http.server import respond_with_json_bytes, request_handler
-from synapse.http.servlet import parse_json_object_from_request
-
-from twisted.web.resource import Resource
-from twisted.web.server import NOT_DONE_YET
-
-
-class ExpireCacheResource(Resource):
- """
- HTTP endpoint for expiring storage caches.
-
- POST /_synapse/replication/expire_cache HTTP/1.1
- Content-Type: application/json
-
- {
- "invalidate": [
- {
- "name": "func_name",
- "keys": ["key1", "key2"]
- }
- ]
- }
- """
-
- def __init__(self, hs):
- Resource.__init__(self) # Resource is old-style, so no super()
-
- self.store = hs.get_datastore()
- self.version_string = hs.version_string
- self.clock = hs.get_clock()
-
- def render_POST(self, request):
- self._async_render_POST(request)
- return NOT_DONE_YET
-
- @request_handler()
- def _async_render_POST(self, request):
- content = parse_json_object_from_request(request)
-
- for row in content["invalidate"]:
- name = row["name"]
- keys = tuple(row["keys"])
-
- getattr(self.store, name).invalidate(keys)
-
- respond_with_json_bytes(request, 200, "{}")
diff --git a/synapse/replication/presence_resource.py b/synapse/replication/presence_resource.py
deleted file mode 100644
index fc18130ab4..0000000000
--- a/synapse/replication/presence_resource.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.http.server import respond_with_json_bytes, request_handler
-from synapse.http.servlet import parse_json_object_from_request
-
-from twisted.web.resource import Resource
-from twisted.web.server import NOT_DONE_YET
-from twisted.internet import defer
-
-
-class PresenceResource(Resource):
- """
- HTTP endpoint for marking users as syncing.
-
- POST /_synapse/replication/presence HTTP/1.1
- Content-Type: application/json
-
- {
- "process_id": "<process_id>",
- "syncing_users": ["<user_id>"]
- }
- """
-
- def __init__(self, hs):
- Resource.__init__(self) # Resource is old-style, so no super()
-
- self.version_string = hs.version_string
- self.clock = hs.get_clock()
- self.presence_handler = hs.get_presence_handler()
-
- def render_POST(self, request):
- self._async_render_POST(request)
- return NOT_DONE_YET
-
- @request_handler()
- @defer.inlineCallbacks
- def _async_render_POST(self, request):
- content = parse_json_object_from_request(request)
-
- process_id = content["process_id"]
- syncing_user_ids = content["syncing_users"]
-
- yield self.presence_handler.update_external_syncs(
- process_id, set(syncing_user_ids)
- )
-
- respond_with_json_bytes(request, 200, "{}")
diff --git a/synapse/replication/pusher_resource.py b/synapse/replication/pusher_resource.py
deleted file mode 100644
index 9b01ab3c13..0000000000
--- a/synapse/replication/pusher_resource.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.http.server import respond_with_json_bytes, request_handler
-from synapse.http.servlet import parse_json_object_from_request
-
-from twisted.web.resource import Resource
-from twisted.web.server import NOT_DONE_YET
-from twisted.internet import defer
-
-
-class PusherResource(Resource):
- """
- HTTP endpoint for deleting rejected pushers
- """
-
- def __init__(self, hs):
- Resource.__init__(self) # Resource is old-style, so no super()
-
- self.version_string = hs.version_string
- self.store = hs.get_datastore()
- self.notifier = hs.get_notifier()
- self.clock = hs.get_clock()
-
- def render_POST(self, request):
- self._async_render_POST(request)
- return NOT_DONE_YET
-
- @request_handler()
- @defer.inlineCallbacks
- def _async_render_POST(self, request):
- content = parse_json_object_from_request(request)
-
- for remove in content["remove"]:
- yield self.store.delete_pusher_by_app_id_pushkey_user_id(
- remove["app_id"],
- remove["push_key"],
- remove["user_id"],
- )
-
- self.notifier.on_new_replication_data()
-
- respond_with_json_bytes(request, 200, "{}")
diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py
deleted file mode 100644
index 03930fe958..0000000000
--- a/synapse/replication/resource.py
+++ /dev/null
@@ -1,576 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.http.servlet import parse_integer, parse_string
-from synapse.http.server import request_handler, finish_request
-from synapse.replication.pusher_resource import PusherResource
-from synapse.replication.presence_resource import PresenceResource
-from synapse.replication.expire_cache import ExpireCacheResource
-from synapse.api.errors import SynapseError
-
-from twisted.web.resource import Resource
-from twisted.web.server import NOT_DONE_YET
-from twisted.internet import defer
-
-import ujson as json
-
-import collections
-import logging
-
-logger = logging.getLogger(__name__)
-
-REPLICATION_PREFIX = "/_synapse/replication"
-
-STREAM_NAMES = (
- ("events",),
- ("presence",),
- ("typing",),
- ("receipts",),
- ("user_account_data", "room_account_data", "tag_account_data",),
- ("backfill",),
- ("push_rules",),
- ("pushers",),
- ("caches",),
- ("to_device",),
- ("public_rooms",),
- ("federation",),
- ("device_lists",),
-)
-
-
-class ReplicationResource(Resource):
- """
- HTTP endpoint for extracting data from synapse.
-
- The streams of data returned by the endpoint are controlled by the
- parameters given to the API. To return a given stream pass a query
- parameter with a position in the stream to return data from or the
- special value "-1" to return data from the start of the stream.
-
- If there is no data for any of the supplied streams after the given
- position then the request will block until there is data for one
- of the streams. This allows clients to long-poll this API.
-
- The possible streams are:
-
- * "streams": A special stream returing the positions of other streams.
- * "events": The new events seen on the server.
- * "presence": Presence updates.
- * "typing": Typing updates.
- * "receipts": Receipt updates.
- * "user_account_data": Top-level per user account data.
- * "room_account_data: Per room per user account data.
- * "tag_account_data": Per room per user tags.
- * "backfill": Old events that have been backfilled from other servers.
- * "push_rules": Per user changes to push rules.
- * "pushers": Per user changes to their pushers.
- * "caches": Cache invalidations.
-
- The API takes two additional query parameters:
-
- * "timeout": How long to wait before returning an empty response.
- * "limit": The maximum number of rows to return for the selected streams.
-
- The response is a JSON object with keys for each stream with updates. Under
- each key is a JSON object with:
-
- * "position": The current position of the stream.
- * "field_names": The names of the fields in each row.
- * "rows": The updates as an array of arrays.
-
- There are a number of ways this API could be used:
-
- 1) To replicate the contents of the backing database to another database.
- 2) To be notified when the contents of a shared backing database changes.
- 3) To "tail" the activity happening on a server for debugging.
-
- In the first case the client would track all of the streams and store it's
- own copy of the data.
-
- In the second case the client might theoretically just be able to follow
- the "streams" stream to track where the other streams are. However in
- practise it will probably need to get the contents of the streams in
- order to expire the any in-memory caches. Whether it gets the contents
- of the streams from this replication API or directly from the backing
- store is a matter of taste.
-
- In the third case the client would use the "streams" stream to find what
- streams are available and their current positions. Then it can start
- long-polling this replication API for new data on those streams.
- """
-
- def __init__(self, hs):
- Resource.__init__(self) # Resource is old-style, so no super()
-
- self.version_string = hs.version_string
- self.store = hs.get_datastore()
- self.sources = hs.get_event_sources()
- self.presence_handler = hs.get_presence_handler()
- self.typing_handler = hs.get_typing_handler()
- self.federation_sender = hs.get_federation_sender()
- self.notifier = hs.notifier
- self.clock = hs.get_clock()
- self.config = hs.get_config()
-
- self.putChild("remove_pushers", PusherResource(hs))
- self.putChild("syncing_users", PresenceResource(hs))
- self.putChild("expire_cache", ExpireCacheResource(hs))
-
- def render_GET(self, request):
- self._async_render_GET(request)
- return NOT_DONE_YET
-
- @defer.inlineCallbacks
- def current_replication_token(self):
- stream_token = yield self.sources.get_current_token()
- backfill_token = yield self.store.get_current_backfill_token()
- push_rules_token, room_stream_token = self.store.get_push_rules_stream_token()
- pushers_token = self.store.get_pushers_stream_token()
- caches_token = self.store.get_cache_stream_token()
- public_rooms_token = self.store.get_current_public_room_stream_id()
- federation_token = self.federation_sender.get_current_token()
- device_list_token = self.store.get_device_stream_token()
-
- defer.returnValue(_ReplicationToken(
- room_stream_token,
- int(stream_token.presence_key),
- int(stream_token.typing_key),
- int(stream_token.receipt_key),
- int(stream_token.account_data_key),
- backfill_token,
- push_rules_token,
- pushers_token,
- 0, # State stream is no longer a thing
- caches_token,
- int(stream_token.to_device_key),
- int(public_rooms_token),
- int(federation_token),
- int(device_list_token),
- ))
-
- @request_handler()
- @defer.inlineCallbacks
- def _async_render_GET(self, request):
- limit = parse_integer(request, "limit", 100)
- timeout = parse_integer(request, "timeout", 10 * 1000)
-
- request.setHeader(b"Content-Type", b"application/json")
-
- request_streams = {
- name: parse_integer(request, name)
- for names in STREAM_NAMES for name in names
- }
- request_streams["streams"] = parse_string(request, "streams")
-
- federation_ack = parse_integer(request, "federation_ack", None)
-
- def replicate():
- return self.replicate(
- request_streams, limit,
- federation_ack=federation_ack
- )
-
- writer = yield self.notifier.wait_for_replication(replicate, timeout)
- result = writer.finish()
-
- for stream_name, stream_content in result.items():
- logger.info(
- "Replicating %d rows of %s from %s -> %s",
- len(stream_content["rows"]),
- stream_name,
- request_streams.get(stream_name),
- stream_content["position"],
- )
-
- request.write(json.dumps(result, ensure_ascii=False))
- finish_request(request)
-
- @defer.inlineCallbacks
- def replicate(self, request_streams, limit, federation_ack=None):
- writer = _Writer()
- current_token = yield self.current_replication_token()
- logger.debug("Replicating up to %r", current_token)
-
- if limit == 0:
- raise SynapseError(400, "Limit cannot be 0")
-
- yield self.account_data(writer, current_token, limit, request_streams)
- yield self.events(writer, current_token, limit, request_streams)
- # TODO: implement limit
- yield self.presence(writer, current_token, request_streams)
- yield self.typing(writer, current_token, request_streams)
- yield self.receipts(writer, current_token, limit, request_streams)
- yield self.push_rules(writer, current_token, limit, request_streams)
- yield self.pushers(writer, current_token, limit, request_streams)
- yield self.caches(writer, current_token, limit, request_streams)
- yield self.to_device(writer, current_token, limit, request_streams)
- yield self.public_rooms(writer, current_token, limit, request_streams)
- yield self.device_lists(writer, current_token, limit, request_streams)
- self.federation(writer, current_token, limit, request_streams, federation_ack)
- self.streams(writer, current_token, request_streams)
-
- logger.debug("Replicated %d rows", writer.total)
- defer.returnValue(writer)
-
- def streams(self, writer, current_token, request_streams):
- request_token = request_streams.get("streams")
-
- streams = []
-
- if request_token is not None:
- if request_token == "-1":
- for names, position in zip(STREAM_NAMES, current_token):
- streams.extend((name, position) for name in names)
- else:
- items = zip(
- STREAM_NAMES,
- current_token,
- _ReplicationToken(request_token)
- )
- for names, current_id, last_id in items:
- if last_id < current_id:
- streams.extend((name, current_id) for name in names)
-
- if streams:
- writer.write_header_and_rows(
- "streams", streams, ("name", "position"),
- position=str(current_token)
- )
-
- @defer.inlineCallbacks
- def events(self, writer, current_token, limit, request_streams):
- request_events = request_streams.get("events")
- request_backfill = request_streams.get("backfill")
-
- if request_events is not None or request_backfill is not None:
- if request_events is None:
- request_events = current_token.events
- if request_backfill is None:
- request_backfill = current_token.backfill
-
- no_new_tokens = (
- request_events == current_token.events
- and request_backfill == current_token.backfill
- )
- if no_new_tokens:
- return
-
- res = yield self.store.get_all_new_events(
- request_backfill, request_events,
- current_token.backfill, current_token.events,
- limit
- )
-
- upto_events_token = _position_from_rows(
- res.new_forward_events, current_token.events
- )
-
- upto_backfill_token = _position_from_rows(
- res.new_backfill_events, current_token.backfill
- )
-
- if request_events != upto_events_token:
- writer.write_header_and_rows("events", res.new_forward_events, (
- "position", "event_id", "room_id", "type", "state_key",
- ), position=upto_events_token)
-
- if request_backfill != upto_backfill_token:
- writer.write_header_and_rows("backfill", res.new_backfill_events, (
- "position", "event_id", "room_id", "type", "state_key", "redacts",
- ), position=upto_backfill_token)
-
- writer.write_header_and_rows(
- "forward_ex_outliers", res.forward_ex_outliers,
- ("position", "event_id", "state_group"),
- )
- writer.write_header_and_rows(
- "backward_ex_outliers", res.backward_ex_outliers,
- ("position", "event_id", "state_group"),
- )
-
- @defer.inlineCallbacks
- def presence(self, writer, current_token, request_streams):
- current_position = current_token.presence
-
- request_presence = request_streams.get("presence")
-
- if request_presence is not None and request_presence != current_position:
- presence_rows = yield self.presence_handler.get_all_presence_updates(
- request_presence, current_position
- )
- upto_token = _position_from_rows(presence_rows, current_position)
- writer.write_header_and_rows("presence", presence_rows, (
- "position", "user_id", "state", "last_active_ts",
- "last_federation_update_ts", "last_user_sync_ts",
- "status_msg", "currently_active",
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def typing(self, writer, current_token, request_streams):
- current_position = current_token.typing
-
- request_typing = request_streams.get("typing")
-
- if request_typing is not None and request_typing != current_position:
- # If they have a higher token than current max, we can assume that
- # they had been talking to a previous instance of the master. Since
- # we reset the token on restart, the best (but hacky) thing we can
- # do is to simply resend down all the typing notifications.
- if request_typing > current_position:
- request_typing = 0
-
- typing_rows = yield self.typing_handler.get_all_typing_updates(
- request_typing, current_position
- )
- upto_token = _position_from_rows(typing_rows, current_position)
- writer.write_header_and_rows("typing", typing_rows, (
- "position", "room_id", "typing"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def receipts(self, writer, current_token, limit, request_streams):
- current_position = current_token.receipts
-
- request_receipts = request_streams.get("receipts")
-
- if request_receipts is not None and request_receipts != current_position:
- receipts_rows = yield self.store.get_all_updated_receipts(
- request_receipts, current_position, limit
- )
- upto_token = _position_from_rows(receipts_rows, current_position)
- writer.write_header_and_rows("receipts", receipts_rows, (
- "position", "room_id", "receipt_type", "user_id", "event_id", "data"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def account_data(self, writer, current_token, limit, request_streams):
- current_position = current_token.account_data
-
- user_account_data = request_streams.get("user_account_data")
- room_account_data = request_streams.get("room_account_data")
- tag_account_data = request_streams.get("tag_account_data")
-
- if user_account_data is not None or room_account_data is not None:
- if user_account_data is None:
- user_account_data = current_position
- if room_account_data is None:
- room_account_data = current_position
-
- no_new_tokens = (
- user_account_data == current_position
- and room_account_data == current_position
- )
- if no_new_tokens:
- return
-
- user_rows, room_rows = yield self.store.get_all_updated_account_data(
- user_account_data, room_account_data, current_position, limit
- )
-
- upto_users_token = _position_from_rows(user_rows, current_position)
- upto_rooms_token = _position_from_rows(room_rows, current_position)
-
- writer.write_header_and_rows("user_account_data", user_rows, (
- "position", "user_id", "type", "content"
- ), position=upto_users_token)
- writer.write_header_and_rows("room_account_data", room_rows, (
- "position", "user_id", "room_id", "type", "content"
- ), position=upto_rooms_token)
-
- if tag_account_data is not None:
- tag_rows = yield self.store.get_all_updated_tags(
- tag_account_data, current_position, limit
- )
- upto_tag_token = _position_from_rows(tag_rows, current_position)
- writer.write_header_and_rows("tag_account_data", tag_rows, (
- "position", "user_id", "room_id", "tags"
- ), position=upto_tag_token)
-
- @defer.inlineCallbacks
- def push_rules(self, writer, current_token, limit, request_streams):
- current_position = current_token.push_rules
-
- push_rules = request_streams.get("push_rules")
-
- if push_rules is not None and push_rules != current_position:
- rows = yield self.store.get_all_push_rule_updates(
- push_rules, current_position, limit
- )
- upto_token = _position_from_rows(rows, current_position)
- writer.write_header_and_rows("push_rules", rows, (
- "position", "event_stream_ordering", "user_id", "rule_id", "op",
- "priority_class", "priority", "conditions", "actions"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def pushers(self, writer, current_token, limit, request_streams):
- current_position = current_token.pushers
-
- pushers = request_streams.get("pushers")
-
- if pushers is not None and pushers != current_position:
- updated, deleted = yield self.store.get_all_updated_pushers(
- pushers, current_position, limit
- )
- upto_token = _position_from_rows(updated, current_position)
- writer.write_header_and_rows("pushers", updated, (
- "position", "user_id", "access_token", "profile_tag", "kind",
- "app_id", "app_display_name", "device_display_name", "pushkey",
- "ts", "lang", "data"
- ), position=upto_token)
- writer.write_header_and_rows("deleted_pushers", deleted, (
- "position", "user_id", "app_id", "pushkey"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def caches(self, writer, current_token, limit, request_streams):
- current_position = current_token.caches
-
- caches = request_streams.get("caches")
-
- if caches is not None and caches != current_position:
- updated_caches = yield self.store.get_all_updated_caches(
- caches, current_position, limit
- )
- upto_token = _position_from_rows(updated_caches, current_position)
- writer.write_header_and_rows("caches", updated_caches, (
- "position", "cache_func", "keys", "invalidation_ts"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def to_device(self, writer, current_token, limit, request_streams):
- current_position = current_token.to_device
-
- to_device = request_streams.get("to_device")
-
- if to_device is not None and to_device != current_position:
- to_device_rows = yield self.store.get_all_new_device_messages(
- to_device, current_position, limit
- )
- upto_token = _position_from_rows(to_device_rows, current_position)
- writer.write_header_and_rows("to_device", to_device_rows, (
- "position", "user_id", "device_id", "message_json"
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def public_rooms(self, writer, current_token, limit, request_streams):
- current_position = current_token.public_rooms
-
- public_rooms = request_streams.get("public_rooms")
-
- if public_rooms is not None and public_rooms != current_position:
- public_rooms_rows = yield self.store.get_all_new_public_rooms(
- public_rooms, current_position, limit
- )
- upto_token = _position_from_rows(public_rooms_rows, current_position)
- writer.write_header_and_rows("public_rooms", public_rooms_rows, (
- "position", "room_id", "visibility", "appservice_id", "network_id",
- ), position=upto_token)
-
- def federation(self, writer, current_token, limit, request_streams, federation_ack):
- if self.config.send_federation:
- return
-
- current_position = current_token.federation
-
- federation = request_streams.get("federation")
-
- if federation is not None and federation != current_position:
- federation_rows = self.federation_sender.get_replication_rows(
- federation, limit, federation_ack=federation_ack,
- )
- upto_token = _position_from_rows(federation_rows, current_position)
- writer.write_header_and_rows("federation", federation_rows, (
- "position", "type", "content",
- ), position=upto_token)
-
- @defer.inlineCallbacks
- def device_lists(self, writer, current_token, limit, request_streams):
- current_position = current_token.device_lists
-
- device_lists = request_streams.get("device_lists")
-
- if device_lists is not None and device_lists != current_position:
- changes = yield self.store.get_all_device_list_changes_for_remotes(
- device_lists,
- )
- writer.write_header_and_rows("device_lists", changes, (
- "position", "user_id", "destination",
- ), position=current_position)
-
-
-class _Writer(object):
- """Writes the streams as a JSON object as the response to the request"""
- def __init__(self):
- self.streams = {}
- self.total = 0
-
- def write_header_and_rows(self, name, rows, fields, position=None):
- if position is None:
- if rows:
- position = rows[-1][0]
- else:
- return
-
- self.streams[name] = {
- "position": position if type(position) is int else str(position),
- "field_names": fields,
- "rows": rows,
- }
-
- self.total += len(rows)
-
- def __nonzero__(self):
- return bool(self.total)
-
- def finish(self):
- return self.streams
-
-
-class _ReplicationToken(collections.namedtuple("_ReplicationToken", (
- "events", "presence", "typing", "receipts", "account_data", "backfill",
- "push_rules", "pushers", "state", "caches", "to_device", "public_rooms",
- "federation", "device_lists",
-))):
- __slots__ = []
-
- def __new__(cls, *args):
- if len(args) == 1:
- streams = [int(value) for value in args[0].split("_")]
- if len(streams) < len(cls._fields):
- streams.extend([0] * (len(cls._fields) - len(streams)))
- return cls(*streams)
- else:
- return super(_ReplicationToken, cls).__new__(cls, *args)
-
- def __str__(self):
- return "_".join(str(value) for value in self)
-
-
-def _position_from_rows(rows, current_position):
- """Calculates a position to return for a stream. Ideally we want to return the
- position of the last row, as that will be the most correct. However, if there
- are no rows we fall back to using the current position to stop us from
- repeatedly hitting the storage layer unncessarily thinking there are updates.
- (Not all advances of the token correspond to an actual update)
-
- We can't just always return the current position, as we often limit the
- number of rows we replicate, and so the stream may lag. The assumption is
- that if the storage layer returns no new rows then we are not lagging and
- we are at the `current_position`.
- """
- if rows:
- return rows[-1][0]
- return current_position
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index ab133db872..b962641166 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -15,7 +15,6 @@
from synapse.storage._base import SQLBaseStore
from synapse.storage.engines import PostgresEngine
-from twisted.internet import defer
from ._slaved_id_tracker import SlavedIdTracker
@@ -34,8 +33,7 @@ class BaseSlavedStore(SQLBaseStore):
else:
self._cache_id_gen = None
- self.expire_cache_url = hs.config.worker_replication_url + "/expire_cache"
- self.http_client = hs.get_simple_http_client()
+ self.hs = hs
def stream_positions(self):
pos = {}
@@ -43,35 +41,20 @@ class BaseSlavedStore(SQLBaseStore):
pos["caches"] = self._cache_id_gen.get_current_token()
return pos
- def process_replication(self, result):
- stream = result.get("caches")
- if stream:
- for row in stream["rows"]:
- (
- position, cache_func, keys, invalidation_ts,
- ) = row
-
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "caches":
+ self._cache_id_gen.advance(token)
+ for row in rows:
try:
- getattr(self, cache_func).invalidate(tuple(keys))
+ getattr(self, row.cache_func).invalidate(tuple(row.keys))
except AttributeError:
# We probably haven't pulled in the cache in this worker,
# which is fine.
pass
- self._cache_id_gen.advance(int(stream["position"]))
- return defer.succeed(None)
def _invalidate_cache_and_stream(self, txn, cache_func, keys):
txn.call_after(cache_func.invalidate, keys)
txn.call_after(self._send_invalidation_poke, cache_func, keys)
- @defer.inlineCallbacks
def _send_invalidation_poke(self, cache_func, keys):
- try:
- yield self.http_client.post_json_get_json(self.expire_cache_url, {
- "invalidate": [{
- "name": cache_func.__name__,
- "keys": list(keys),
- }]
- })
- except:
- logger.exception("Failed to poke on expire_cache")
+ self.hs.get_tcp_replication().send_invalidate_cache(cache_func, keys)
diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py
index 77c64722c7..efbd87918e 100644
--- a/synapse/replication/slave/storage/account_data.py
+++ b/synapse/replication/slave/storage/account_data.py
@@ -69,38 +69,25 @@ class SlavedAccountDataStore(BaseSlavedStore):
result["tag_account_data"] = position
return result
- def process_replication(self, result):
- stream = result.get("user_account_data")
- if stream:
- self._account_data_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- position, user_id, data_type = row[:3]
- self.get_global_account_data_by_type_for_user.invalidate(
- (data_type, user_id,)
- )
- self.get_account_data_for_user.invalidate((user_id,))
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "tag_account_data":
+ self._account_data_id_gen.advance(token)
+ for row in rows:
+ self.get_tags_for_user.invalidate((row.user_id,))
self._account_data_stream_cache.entity_has_changed(
- user_id, position
+ row.user_id, token
)
-
- stream = result.get("room_account_data")
- if stream:
- self._account_data_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- position, user_id = row[:2]
- self.get_account_data_for_user.invalidate((user_id,))
+ elif stream_name == "account_data":
+ self._account_data_id_gen.advance(token)
+ for row in rows:
+ if not row.room_id:
+ self.get_global_account_data_by_type_for_user.invalidate(
+ (row.data_type, row.user_id,)
+ )
+ self.get_account_data_for_user.invalidate((row.user_id,))
self._account_data_stream_cache.entity_has_changed(
- user_id, position
+ row.user_id, token
)
-
- stream = result.get("tag_account_data")
- if stream:
- self._account_data_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- position, user_id = row[:2]
- self.get_tags_for_user.invalidate((user_id,))
- self._account_data_stream_cache.entity_has_changed(
- user_id, position
- )
-
- return super(SlavedAccountDataStore, self).process_replication(result)
+ return super(SlavedAccountDataStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py
index f9102e0d89..6f3fb64770 100644
--- a/synapse/replication/slave/storage/deviceinbox.py
+++ b/synapse/replication/slave/storage/deviceinbox.py
@@ -53,21 +53,18 @@ class SlavedDeviceInboxStore(BaseSlavedStore):
result["to_device"] = self._device_inbox_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("to_device")
- if stream:
- self._device_inbox_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- stream_id = row[0]
- entity = row[1]
-
- if entity.startswith("@"):
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "to_device":
+ self._device_inbox_id_gen.advance(token)
+ for row in rows:
+ if row.entity.startswith("@"):
self._device_inbox_stream_cache.entity_has_changed(
- entity, stream_id
+ row.entity, token
)
else:
self._device_federation_outbox_stream_cache.entity_has_changed(
- entity, stream_id
+ row.entity, token
)
-
- return super(SlavedDeviceInboxStore, self).process_replication(result)
+ return super(SlavedDeviceInboxStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index ca46aa17b6..4d4a435471 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -51,22 +51,18 @@ class SlavedDeviceStore(BaseSlavedStore):
result["device_lists"] = self._device_list_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("device_lists")
- if stream:
- self._device_list_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- stream_id = row[0]
- user_id = row[1]
- destination = row[2]
-
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "device_lists":
+ self._device_list_id_gen.advance(token)
+ for row in rows:
self._device_list_stream_cache.entity_has_changed(
- user_id, stream_id
+ row.user_id, token
)
- if destination:
+ if row.destination:
self._device_list_federation_stream_cache.entity_has_changed(
- destination, stream_id
+ row.destination, token
)
-
- return super(SlavedDeviceStore, self).process_replication(result)
+ return super(SlavedDeviceStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index d4db1e452e..fcaf58b93b 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -71,6 +71,7 @@ class SlavedEventStore(BaseSlavedStore):
# to reach inside the __dict__ to extract them.
get_rooms_for_user = RoomMemberStore.__dict__["get_rooms_for_user"]
get_users_in_room = RoomMemberStore.__dict__["get_users_in_room"]
+ get_hosts_in_room = RoomMemberStore.__dict__["get_hosts_in_room"]
get_users_who_share_room_with_user = (
RoomMemberStore.__dict__["get_users_who_share_room_with_user"]
)
@@ -101,9 +102,6 @@ class SlavedEventStore(BaseSlavedStore):
_get_state_groups_from_groups_txn = (
DataStore._get_state_groups_from_groups_txn.__func__
)
- _get_state_group_from_group = (
- StateStore.__dict__["_get_state_group_from_group"]
- )
get_recent_event_ids_for_room = (
StreamStore.__dict__["get_recent_event_ids_for_room"]
)
@@ -146,6 +144,9 @@ class SlavedEventStore(BaseSlavedStore):
RoomMemberStore.__dict__["_get_joined_users_from_context"]
)
+ get_joined_hosts = DataStore.get_joined_hosts.__func__
+ _get_joined_hosts = RoomMemberStore.__dict__["_get_joined_hosts"]
+
get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__
get_room_events_stream_for_rooms = (
DataStore.get_room_events_stream_for_rooms.__func__
@@ -201,48 +202,25 @@ class SlavedEventStore(BaseSlavedStore):
result["backfill"] = -self._backfill_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("events")
- if stream:
- self._stream_id_gen.advance(int(stream["position"]))
-
- if stream["rows"]:
- logger.info("Got %d event rows", len(stream["rows"]))
-
- for row in stream["rows"]:
- self._process_replication_row(
- row, backfilled=False,
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "events":
+ self._stream_id_gen.advance(token)
+ for row in rows:
+ self.invalidate_caches_for_event(
+ token, row.event_id, row.room_id, row.type, row.state_key,
+ row.redacts,
+ backfilled=False,
)
-
- stream = result.get("backfill")
- if stream:
- self._backfill_id_gen.advance(-int(stream["position"]))
- for row in stream["rows"]:
- self._process_replication_row(
- row, backfilled=True,
+ elif stream_name == "backfill":
+ self._backfill_id_gen.advance(-token)
+ for row in rows:
+ self.invalidate_caches_for_event(
+ -token, row.event_id, row.room_id, row.type, row.state_key,
+ row.redacts,
+ backfilled=True,
)
-
- stream = result.get("forward_ex_outliers")
- if stream:
- self._stream_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- event_id = row[1]
- self._invalidate_get_event_cache(event_id)
-
- stream = result.get("backward_ex_outliers")
- if stream:
- self._backfill_id_gen.advance(-int(stream["position"]))
- for row in stream["rows"]:
- event_id = row[1]
- self._invalidate_get_event_cache(event_id)
-
- return super(SlavedEventStore, self).process_replication(result)
-
- def _process_replication_row(self, row, backfilled):
- stream_ordering = row[0] if not backfilled else -row[0]
- self.invalidate_caches_for_event(
- stream_ordering, row[1], row[2], row[3], row[4], row[5],
- backfilled=backfilled,
+ return super(SlavedEventStore, self).process_replication_rows(
+ stream_name, token, rows
)
def invalidate_caches_for_event(self, stream_ordering, event_id, room_id,
diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py
index e4a2414d78..cfb9280181 100644
--- a/synapse/replication/slave/storage/presence.py
+++ b/synapse/replication/slave/storage/presence.py
@@ -39,6 +39,16 @@ class SlavedPresenceStore(BaseSlavedStore):
_get_presence_for_user = PresenceStore.__dict__["_get_presence_for_user"]
get_presence_for_users = PresenceStore.__dict__["get_presence_for_users"]
+ # XXX: This is a bit broken because we don't persist the accepted list in a
+ # way that can be replicated. This means that we don't have a way to
+ # invalidate the cache correctly.
+ get_presence_list_accepted = PresenceStore.__dict__[
+ "get_presence_list_accepted"
+ ]
+ get_presence_list_observers_accepted = PresenceStore.__dict__[
+ "get_presence_list_observers_accepted"
+ ]
+
def get_current_presence_token(self):
return self._presence_id_gen.get_current_token()
@@ -48,15 +58,14 @@ class SlavedPresenceStore(BaseSlavedStore):
result["presence"] = position
return result
- def process_replication(self, result):
- stream = result.get("presence")
- if stream:
- self._presence_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- position, user_id = row[:2]
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "presence":
+ self._presence_id_gen.advance(token)
+ for row in rows:
self.presence_stream_cache.entity_has_changed(
- user_id, position
+ row.user_id, token
)
- self._get_presence_for_user.invalidate((user_id,))
-
- return super(SlavedPresenceStore, self).process_replication(result)
+ self._get_presence_for_user.invalidate((row.user_id,))
+ return super(SlavedPresenceStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
index 21ceb0213a..83e880fdd2 100644
--- a/synapse/replication/slave/storage/push_rule.py
+++ b/synapse/replication/slave/storage/push_rule.py
@@ -50,18 +50,15 @@ class SlavedPushRuleStore(SlavedEventStore):
result["push_rules"] = self._push_rules_stream_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("push_rules")
- if stream:
- for row in stream["rows"]:
- position = row[0]
- user_id = row[2]
- self.get_push_rules_for_user.invalidate((user_id,))
- self.get_push_rules_enabled_for_user.invalidate((user_id,))
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "push_rules":
+ self._push_rules_stream_id_gen.advance(token)
+ for row in rows:
+ self.get_push_rules_for_user.invalidate((row.user_id,))
+ self.get_push_rules_enabled_for_user.invalidate((row.user_id,))
self.push_rules_stream_cache.entity_has_changed(
- user_id, position
+ row.user_id, token
)
-
- self._push_rules_stream_id_gen.advance(int(stream["position"]))
-
- return super(SlavedPushRuleStore, self).process_replication(result)
+ return super(SlavedPushRuleStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index d88206b3bb..4e8d68ece9 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -40,13 +40,9 @@ class SlavedPusherStore(BaseSlavedStore):
result["pushers"] = self._pushers_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("pushers")
- if stream:
- self._pushers_id_gen.advance(int(stream["position"]))
-
- stream = result.get("deleted_pushers")
- if stream:
- self._pushers_id_gen.advance(int(stream["position"]))
-
- return super(SlavedPusherStore, self).process_replication(result)
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "pushers":
+ self._pushers_id_gen.advance(token)
+ return super(SlavedPusherStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py
index ac9662d399..b371574ece 100644
--- a/synapse/replication/slave/storage/receipts.py
+++ b/synapse/replication/slave/storage/receipts.py
@@ -65,20 +65,22 @@ class SlavedReceiptsStore(BaseSlavedStore):
result["receipts"] = self._receipts_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("receipts")
- if stream:
- self._receipts_id_gen.advance(int(stream["position"]))
- for row in stream["rows"]:
- position, room_id, receipt_type, user_id = row[:4]
- self.invalidate_caches_for_receipt(room_id, receipt_type, user_id)
- self._receipts_stream_cache.entity_has_changed(room_id, position)
-
- return super(SlavedReceiptsStore, self).process_replication(result)
-
def invalidate_caches_for_receipt(self, room_id, receipt_type, user_id):
self.get_receipts_for_user.invalidate((user_id, receipt_type))
self.get_linearized_receipts_for_room.invalidate_many((room_id,))
self.get_last_receipt_event_id_for_user.invalidate(
(user_id, room_id, receipt_type)
)
+
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "receipts":
+ self._receipts_id_gen.advance(token)
+ for row in rows:
+ self.invalidate_caches_for_receipt(
+ row.room_id, row.receipt_type, row.user_id
+ )
+ self._receipts_stream_cache.entity_has_changed(row.room_id, token)
+
+ return super(SlavedReceiptsStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py
index 6df9a25ef3..f510384033 100644
--- a/synapse/replication/slave/storage/room.py
+++ b/synapse/replication/slave/storage/room.py
@@ -46,9 +46,10 @@ class RoomStore(BaseSlavedStore):
result["public_rooms"] = self._public_room_id_gen.get_current_token()
return result
- def process_replication(self, result):
- stream = result.get("public_rooms")
- if stream:
- self._public_room_id_gen.advance(int(stream["position"]))
+ def process_replication_rows(self, stream_name, token, rows):
+ if stream_name == "public_rooms":
+ self._public_room_id_gen.advance(token)
- return super(RoomStore, self).process_replication(result)
+ return super(RoomStore, self).process_replication_rows(
+ stream_name, token, rows
+ )
diff --git a/synapse/replication/tcp/__init__.py b/synapse/replication/tcp/__init__.py
new file mode 100644
index 0000000000..81c2ea7ee9
--- /dev/null
+++ b/synapse/replication/tcp/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module implements the TCP replication protocol used by synapse to
+communicate between the master process and its workers (when they're enabled).
+
+Further details can be found in docs/tcp_replication.rst
+
+
+Structure of the module:
+ * client.py - the client classes used for workers to connect to master
+ * command.py - the definitions of all the valid commands
+ * protocol.py - contains bot the client and server protocol implementations,
+ these should not be used directly
+ * resource.py - the server classes that accepts and handle client connections
+ * streams.py - the definitons of all the valid streams
+
+"""
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
new file mode 100644
index 0000000000..90fb6c1336
--- /dev/null
+++ b/synapse/replication/tcp/client.py
@@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A replication client for use by synapse workers.
+"""
+
+from twisted.internet import reactor, defer
+from twisted.internet.protocol import ReconnectingClientFactory
+
+from .commands import (
+ FederationAckCommand, UserSyncCommand, RemovePusherCommand, InvalidateCacheCommand,
+)
+from .protocol import ClientReplicationStreamProtocol
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ReplicationClientFactory(ReconnectingClientFactory):
+ """Factory for building connections to the master. Will reconnect if the
+ connection is lost.
+
+ Accepts a handler that will be called when new data is available or data
+ is required.
+ """
+ maxDelay = 5 # Try at least once every N seconds
+
+ def __init__(self, hs, client_name, handler):
+ self.client_name = client_name
+ self.handler = handler
+ self.server_name = hs.config.server_name
+ self._clock = hs.get_clock() # As self.clock is defined in super class
+
+ reactor.addSystemEventTrigger("before", "shutdown", self.stopTrying)
+
+ def startedConnecting(self, connector):
+ logger.info("Connecting to replication: %r", connector.getDestination())
+
+ def buildProtocol(self, addr):
+ logger.info("Connected to replication: %r", addr)
+ self.resetDelay()
+ return ClientReplicationStreamProtocol(
+ self.client_name, self.server_name, self._clock, self.handler
+ )
+
+ def clientConnectionLost(self, connector, reason):
+ logger.error("Lost replication conn: %r", reason)
+ ReconnectingClientFactory.clientConnectionLost(self, connector, reason)
+
+ def clientConnectionFailed(self, connector, reason):
+ logger.error("Failed to connect to replication: %r", reason)
+ ReconnectingClientFactory.clientConnectionFailed(
+ self, connector, reason
+ )
+
+
+class ReplicationClientHandler(object):
+ """A base handler that can be passed to the ReplicationClientFactory.
+
+ By default proxies incoming replication data to the SlaveStore.
+ """
+ def __init__(self, store):
+ self.store = store
+
+ # The current connection. None if we are currently (re)connecting
+ self.connection = None
+
+ # Any pending commands to be sent once a new connection has been
+ # established
+ self.pending_commands = []
+
+ # Map from string -> deferred, to wake up when receiveing a SYNC with
+ # the given string.
+ # Used for tests.
+ self.awaiting_syncs = {}
+
+ def start_replication(self, hs):
+ """Helper method to start a replication connection to the remote server
+ using TCP.
+ """
+ client_name = hs.config.worker_name
+ factory = ReplicationClientFactory(hs, client_name, self)
+ host = hs.config.worker_replication_host
+ port = hs.config.worker_replication_port
+ reactor.connectTCP(host, port, factory)
+
+ def on_rdata(self, stream_name, token, rows):
+ """Called when we get new replication data. By default this just pokes
+ the slave store.
+
+ Can be overriden in subclasses to handle more.
+ """
+ logger.info("Received rdata %s -> %s", stream_name, token)
+ self.store.process_replication_rows(stream_name, token, rows)
+
+ def on_position(self, stream_name, token):
+ """Called when we get new position data. By default this just pokes
+ the slave store.
+
+ Can be overriden in subclasses to handle more.
+ """
+ self.store.process_replication_rows(stream_name, token, [])
+
+ def on_sync(self, data):
+ """When we received a SYNC we wake up any deferreds that were waiting
+ for the sync with the given data.
+
+ Used by tests.
+ """
+ d = self.awaiting_syncs.pop(data, None)
+ if d:
+ d.callback(data)
+
+ def get_streams_to_replicate(self):
+ """Called when a new connection has been established and we need to
+ subscribe to streams.
+
+ Returns a dictionary of stream name to token.
+ """
+ args = self.store.stream_positions()
+ user_account_data = args.pop("user_account_data", None)
+ room_account_data = args.pop("room_account_data", None)
+ if user_account_data:
+ args["account_data"] = user_account_data
+ elif room_account_data:
+ args["account_data"] = room_account_data
+ return args
+
+ def get_currently_syncing_users(self):
+ """Get the list of currently syncing users (if any). This is called
+ when a connection has been established and we need to send the
+ currently syncing users. (Overriden by the synchrotron's only)
+ """
+ return []
+
+ def send_command(self, cmd):
+ """Send a command to master (when we get establish a connection if we
+ don't have one already.)
+ """
+ if self.connection:
+ self.connection.send_command(cmd)
+ else:
+ logger.warn("Queuing command as not connected: %r", cmd.NAME)
+ self.pending_commands.append(cmd)
+
+ def send_federation_ack(self, token):
+ """Ack data for the federation stream. This allows the master to drop
+ data stored purely in memory.
+ """
+ self.send_command(FederationAckCommand(token))
+
+ def send_user_sync(self, user_id, is_syncing, last_sync_ms):
+ """Poke the master that a user has started/stopped syncing.
+ """
+ self.send_command(UserSyncCommand(user_id, is_syncing, last_sync_ms))
+
+ def send_remove_pusher(self, app_id, push_key, user_id):
+ """Poke the master to remove a pusher for a user
+ """
+ cmd = RemovePusherCommand(app_id, push_key, user_id)
+ self.send_command(cmd)
+
+ def send_invalidate_cache(self, cache_func, keys):
+ """Poke the master to invalidate a cache.
+ """
+ cmd = InvalidateCacheCommand(cache_func.__name__, keys)
+ self.send_command(cmd)
+
+ def await_sync(self, data):
+ """Returns a deferred that is resolved when we receive a SYNC command
+ with given data.
+
+ Used by tests.
+ """
+ return self.awaiting_syncs.setdefault(data, defer.Deferred())
+
+ def update_connection(self, connection):
+ """Called when a connection has been established (or lost with None).
+ """
+ self.connection = connection
+ if connection:
+ for cmd in self.pending_commands:
+ connection.send_command(cmd)
+ self.pending_commands = []
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
new file mode 100644
index 0000000000..84d2a2272a
--- /dev/null
+++ b/synapse/replication/tcp/commands.py
@@ -0,0 +1,346 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Defines the various valid commands
+
+The VALID_SERVER_COMMANDS and VALID_CLIENT_COMMANDS define which commands are
+allowed to be sent by which side.
+"""
+
+import logging
+import ujson as json
+
+
+logger = logging.getLogger(__name__)
+
+
+class Command(object):
+ """The base command class.
+
+ All subclasses must set the NAME variable which equates to the name of the
+ command on the wire.
+
+ A full command line on the wire is constructed from `NAME + " " + to_line()`
+
+ The default implementation creates a command of form `<NAME> <data>`
+ """
+ NAME = None
+
+ def __init__(self, data):
+ self.data = data
+
+ @classmethod
+ def from_line(cls, line):
+ """Deserialises a line from the wire into this command. `line` does not
+ include the command.
+ """
+ return cls(line)
+
+ def to_line(self):
+ """Serialises the comamnd for the wire. Does not include the command
+ prefix.
+ """
+ return self.data
+
+
+class ServerCommand(Command):
+ """Sent by the server on new connection and includes the server_name.
+
+ Format::
+
+ SERVER <server_name>
+ """
+ NAME = "SERVER"
+
+
+class RdataCommand(Command):
+ """Sent by server when a subscribed stream has an update.
+
+ Format::
+
+ RDATA <stream_name> <token> <row_json>
+
+ The `<token>` may either be a numeric stream id OR "batch". The latter case
+ is used to support sending multiple updates with the same stream ID. This
+ is done by sending an RDATA for each row, with all but the last RDATA having
+ a token of "batch" and the last having the final stream ID.
+
+ The client should batch all incoming RDATA with a token of "batch" (per
+ stream_name) until it sees an RDATA with a numeric stream ID.
+
+ `<token>` of "batch" maps to the instance variable `token` being None.
+
+ An example of a batched series of RDATA::
+
+ RDATA presence batch ["@foo:example.com", "online", ...]
+ RDATA presence batch ["@bar:example.com", "online", ...]
+ RDATA presence 59 ["@baz:example.com", "online", ...]
+ """
+ NAME = "RDATA"
+
+ def __init__(self, stream_name, token, row):
+ self.stream_name = stream_name
+ self.token = token
+ self.row = row
+
+ @classmethod
+ def from_line(cls, line):
+ stream_name, token, row_json = line.split(" ", 2)
+ return cls(
+ stream_name,
+ None if token == "batch" else int(token),
+ json.loads(row_json)
+ )
+
+ def to_line(self):
+ return " ".join((
+ self.stream_name,
+ str(self.token) if self.token is not None else "batch",
+ json.dumps(self.row),
+ ))
+
+
+class PositionCommand(Command):
+ """Sent by the client to tell the client the stream postition without
+ needing to send an RDATA.
+ """
+ NAME = "POSITION"
+
+ def __init__(self, stream_name, token):
+ self.stream_name = stream_name
+ self.token = token
+
+ @classmethod
+ def from_line(cls, line):
+ stream_name, token = line.split(" ", 1)
+ return cls(stream_name, int(token))
+
+ def to_line(self):
+ return " ".join((self.stream_name, str(self.token),))
+
+
+class ErrorCommand(Command):
+ """Sent by either side if there was an ERROR. The data is a string describing
+ the error.
+ """
+ NAME = "ERROR"
+
+
+class PingCommand(Command):
+ """Sent by either side as a keep alive. The data is arbitary (often timestamp)
+ """
+ NAME = "PING"
+
+
+class NameCommand(Command):
+ """Sent by client to inform the server of the client's identity. The data
+ is the name
+ """
+ NAME = "NAME"
+
+
+class ReplicateCommand(Command):
+ """Sent by the client to subscribe to the stream.
+
+ Format::
+
+ REPLICATE <stream_name> <token>
+
+ Where <token> may be either:
+ * a numeric stream_id to stream updates from
+ * "NOW" to stream all subsequent updates.
+
+ The <stream_name> can be "ALL" to subscribe to all known streams, in which
+ case the <token> must be set to "NOW", i.e.::
+
+ REPLICATE ALL NOW
+ """
+ NAME = "REPLICATE"
+
+ def __init__(self, stream_name, token):
+ self.stream_name = stream_name
+ self.token = token
+
+ @classmethod
+ def from_line(cls, line):
+ stream_name, token = line.split(" ", 1)
+ if token in ("NOW", "now"):
+ token = "NOW"
+ else:
+ token = int(token)
+ return cls(stream_name, token)
+
+ def to_line(self):
+ return " ".join((self.stream_name, str(self.token),))
+
+
+class UserSyncCommand(Command):
+ """Sent by the client to inform the server that a user has started or
+ stopped syncing. Used to calculate presence on the master.
+
+ Includes a timestamp of when the last user sync was.
+
+ Format::
+
+ USER_SYNC <user_id> <state> <last_sync_ms>
+
+ Where <state> is either "start" or "stop"
+ """
+ NAME = "USER_SYNC"
+
+ def __init__(self, user_id, is_syncing, last_sync_ms):
+ self.user_id = user_id
+ self.is_syncing = is_syncing
+ self.last_sync_ms = last_sync_ms
+
+ @classmethod
+ def from_line(cls, line):
+ user_id, state, last_sync_ms = line.split(" ", 2)
+
+ if state not in ("start", "end"):
+ raise Exception("Invalid USER_SYNC state %r" % (state,))
+
+ return cls(user_id, state == "start", int(last_sync_ms))
+
+ def to_line(self):
+ return " ".join((
+ self.user_id, "start" if self.is_syncing else "end", str(self.last_sync_ms),
+ ))
+
+
+class FederationAckCommand(Command):
+ """Sent by the client when it has processed up to a given point in the
+ federation stream. This allows the master to drop in-memory caches of the
+ federation stream.
+
+ This must only be sent from one worker (i.e. the one sending federation)
+
+ Format::
+
+ FEDERATION_ACK <token>
+ """
+ NAME = "FEDERATION_ACK"
+
+ def __init__(self, token):
+ self.token = token
+
+ @classmethod
+ def from_line(cls, line):
+ return cls(int(line))
+
+ def to_line(self):
+ return str(self.token)
+
+
+class SyncCommand(Command):
+ """Used for testing. The client protocol implementation allows waiting
+ on a SYNC command with a specified data.
+ """
+ NAME = "SYNC"
+
+
+class RemovePusherCommand(Command):
+ """Sent by the client to request the master remove the given pusher.
+
+ Format::
+
+ REMOVE_PUSHER <app_id> <push_key> <user_id>
+ """
+ NAME = "REMOVE_PUSHER"
+
+ def __init__(self, app_id, push_key, user_id):
+ self.user_id = user_id
+ self.app_id = app_id
+ self.push_key = push_key
+
+ @classmethod
+ def from_line(cls, line):
+ app_id, push_key, user_id = line.split(" ", 2)
+
+ return cls(app_id, push_key, user_id)
+
+ def to_line(self):
+ return " ".join((self.app_id, self.push_key, self.user_id))
+
+
+class InvalidateCacheCommand(Command):
+ """Sent by the client to invalidate an upstream cache.
+
+ THIS IS NOT RELIABLE, AND SHOULD *NOT* BE USED ACCEPT FOR THINGS THAT ARE
+ NOT DISASTROUS IF WE DROP ON THE FLOOR.
+
+ Mainly used to invalidate destination retry timing caches.
+
+ Format::
+
+ INVALIDATE_CACHE <cache_func> <keys_json>
+
+ Where <keys_json> is a json list.
+ """
+ NAME = "INVALIDATE_CACHE"
+
+ def __init__(self, cache_func, keys):
+ self.cache_func = cache_func
+ self.keys = keys
+
+ @classmethod
+ def from_line(cls, line):
+ cache_func, keys_json = line.split(" ", 1)
+
+ return cls(cache_func, json.loads(keys_json))
+
+ def to_line(self):
+ return " ".join((self.cache_func, json.dumps(self.keys)))
+
+
+# Map of command name to command type.
+COMMAND_MAP = {
+ cmd.NAME: cmd
+ for cmd in (
+ ServerCommand,
+ RdataCommand,
+ PositionCommand,
+ ErrorCommand,
+ PingCommand,
+ NameCommand,
+ ReplicateCommand,
+ UserSyncCommand,
+ FederationAckCommand,
+ SyncCommand,
+ RemovePusherCommand,
+ InvalidateCacheCommand,
+ )
+}
+
+# The commands the server is allowed to send
+VALID_SERVER_COMMANDS = (
+ ServerCommand.NAME,
+ RdataCommand.NAME,
+ PositionCommand.NAME,
+ ErrorCommand.NAME,
+ PingCommand.NAME,
+ SyncCommand.NAME,
+)
+
+# The commands the client is allowed to send
+VALID_CLIENT_COMMANDS = (
+ NameCommand.NAME,
+ ReplicateCommand.NAME,
+ PingCommand.NAME,
+ UserSyncCommand.NAME,
+ FederationAckCommand.NAME,
+ RemovePusherCommand.NAME,
+ InvalidateCacheCommand.NAME,
+ ErrorCommand.NAME,
+)
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
new file mode 100644
index 0000000000..9fee2a484b
--- /dev/null
+++ b/synapse/replication/tcp/protocol.py
@@ -0,0 +1,640 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module contains the implementation of both the client and server
+protocols.
+
+The basic structure of the protocol is line based, where the initial word of
+each line specifies the command. The rest of the line is parsed based on the
+command. For example, the `RDATA` command is defined as::
+
+ RDATA <stream_name> <token> <row_json>
+
+(Note that `<row_json>` may contains spaces, but cannot contain newlines.)
+
+Blank lines are ignored.
+
+# Example
+
+An example iteraction is shown below. Each line is prefixed with '>' or '<' to
+indicate which side is sending, these are *not* included on the wire::
+
+ * connection established *
+ > SERVER localhost:8823
+ > PING 1490197665618
+ < NAME synapse.app.appservice
+ < PING 1490197665618
+ < REPLICATE events 1
+ < REPLICATE backfill 1
+ < REPLICATE caches 1
+ > POSITION events 1
+ > POSITION backfill 1
+ > POSITION caches 1
+ > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
+ > RDATA events 14 ["$149019767112vOHxz:localhost:8823",
+ "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null]
+ < PING 1490197675618
+ > ERROR server stopping
+ * connection closed by server *
+"""
+
+from twisted.internet import defer
+from twisted.protocols.basic import LineOnlyReceiver
+from twisted.python.failure import Failure
+
+from commands import (
+ COMMAND_MAP, VALID_CLIENT_COMMANDS, VALID_SERVER_COMMANDS,
+ ErrorCommand, ServerCommand, RdataCommand, PositionCommand, PingCommand,
+ NameCommand, ReplicateCommand, UserSyncCommand, SyncCommand,
+)
+from streams import STREAMS_MAP
+
+from synapse.util.stringutils import random_string
+from synapse.metrics.metric import CounterMetric
+
+import logging
+import synapse.metrics
+import struct
+import fcntl
+
+
+metrics = synapse.metrics.get_metrics_for(__name__)
+
+connection_close_counter = metrics.register_counter(
+ "close_reason", labels=["reason_type"],
+)
+
+
+# A list of all connected protocols. This allows us to send metrics about the
+# connections.
+connected_connections = []
+
+
+logger = logging.getLogger(__name__)
+
+
+PING_TIME = 5000
+PING_TIMEOUT_MULTIPLIER = 5
+PING_TIMEOUT_MS = PING_TIME * PING_TIMEOUT_MULTIPLIER
+
+
+class ConnectionStates(object):
+ CONNECTING = "connecting"
+ ESTABLISHED = "established"
+ PAUSED = "paused"
+ CLOSED = "closed"
+
+
+class BaseReplicationStreamProtocol(LineOnlyReceiver):
+ """Base replication protocol shared between client and server.
+
+ Reads lines (ignoring blank ones) and parses them into command classes,
+ asserting that they are valid for the given direction, i.e. server commands
+ are only sent by the server.
+
+ On receiving a new command it calls `on_<COMMAND_NAME>` with the parsed
+ command.
+
+ It also sends `PING` periodically, and correctly times out remote connections
+ (if they send a `PING` command)
+ """
+ delimiter = b'\n'
+
+ VALID_INBOUND_COMMANDS = [] # Valid commands we expect to receive
+ VALID_OUTBOUND_COMMANDS = [] # Valid commans we can send
+
+ max_line_buffer = 10000
+
+ def __init__(self, clock):
+ self.clock = clock
+
+ self.last_received_command = self.clock.time_msec()
+ self.last_sent_command = 0
+ self.time_we_closed = None # When we requested the connection be closed
+
+ self.received_ping = False # Have we reecived a ping from the other side
+
+ self.state = ConnectionStates.CONNECTING
+
+ self.name = "anon" # The name sent by a client.
+ self.conn_id = random_string(5) # To dedupe in case of name clashes.
+
+ # List of pending commands to send once we've established the connection
+ self.pending_commands = []
+
+ # The LoopingCall for sending pings.
+ self._send_ping_loop = None
+
+ self.inbound_commands_counter = CounterMetric(
+ "inbound_commands", labels=["command"],
+ )
+ self.outbound_commands_counter = CounterMetric(
+ "outbound_commands", labels=["command"],
+ )
+
+ def connectionMade(self):
+ logger.info("[%s] Connection established", self.id())
+
+ self.state = ConnectionStates.ESTABLISHED
+
+ connected_connections.append(self) # Register connection for metrics
+
+ self.transport.registerProducer(self, True) # For the *Producing callbacks
+
+ self._send_pending_commands()
+
+ # Starts sending pings
+ self._send_ping_loop = self.clock.looping_call(self.send_ping, 5000)
+
+ # Always send the initial PING so that the other side knows that they
+ # can time us out.
+ self.send_command(PingCommand(self.clock.time_msec()))
+
+ def send_ping(self):
+ """Periodically sends a ping and checks if we should close the connection
+ due to the other side timing out.
+ """
+ now = self.clock.time_msec()
+
+ if self.time_we_closed:
+ if now - self.time_we_closed > PING_TIMEOUT_MS:
+ logger.info(
+ "[%s] Failed to close connection gracefully, aborting", self.id()
+ )
+ self.transport.abortConnection()
+ else:
+ if now - self.last_sent_command >= PING_TIME:
+ self.send_command(PingCommand(now))
+
+ if self.received_ping and now - self.last_received_command > PING_TIMEOUT_MS:
+ logger.info(
+ "[%s] Connection hasn't received command in %r ms. Closing.",
+ self.id(), now - self.last_received_command
+ )
+ self.send_error("ping timeout")
+
+ def lineReceived(self, line):
+ """Called when we've received a line
+ """
+ if line.strip() == "":
+ # Ignore blank lines
+ return
+
+ line = line.decode("utf-8")
+ cmd_name, rest_of_line = line.split(" ", 1)
+
+ if cmd_name not in self.VALID_INBOUND_COMMANDS:
+ logger.error("[%s] invalid command %s", self.id(), cmd_name)
+ self.send_error("invalid command: %s", cmd_name)
+ return
+
+ self.last_received_command = self.clock.time_msec()
+
+ self.inbound_commands_counter.inc(cmd_name)
+
+ cmd_cls = COMMAND_MAP[cmd_name]
+ try:
+ cmd = cmd_cls.from_line(rest_of_line)
+ except Exception as e:
+ logger.exception(
+ "[%s] failed to parse line %r: %r", self.id(), cmd_name, rest_of_line
+ )
+ self.send_error(
+ "failed to parse line for %r: %r (%r):" % (cmd_name, e, rest_of_line)
+ )
+ return
+
+ # Now lets try and call on_<CMD_NAME> function
+ try:
+ getattr(self, "on_%s" % (cmd_name,))(cmd)
+ except Exception:
+ logger.exception("[%s] Failed to handle line: %r", self.id(), line)
+
+ def close(self):
+ logger.warn("[%s] Closing connection", self.id())
+ self.time_we_closed = self.clock.time_msec()
+ self.transport.loseConnection()
+ self.on_connection_closed()
+
+ def send_error(self, error_string, *args):
+ """Send an error to remote and close the connection.
+ """
+ self.send_command(ErrorCommand(error_string % args))
+ self.close()
+
+ def send_command(self, cmd, do_buffer=True):
+ """Send a command if connection has been established.
+
+ Args:
+ cmd (Command)
+ do_buffer (bool): Whether to buffer the message or always attempt
+ to send the command. This is mostly used to send an error
+ message if we're about to close the connection due our buffers
+ becoming full.
+ """
+ if self.state == ConnectionStates.CLOSED:
+ logger.info("[%s] Not sending, connection closed", self.id())
+ return
+
+ if do_buffer and self.state != ConnectionStates.ESTABLISHED:
+ self._queue_command(cmd)
+ return
+
+ self.outbound_commands_counter.inc(cmd.NAME)
+
+ string = "%s %s" % (cmd.NAME, cmd.to_line(),)
+ if "\n" in string:
+ raise Exception("Unexpected newline in command: %r", string)
+
+ self.sendLine(string.encode("utf-8"))
+
+ self.last_sent_command = self.clock.time_msec()
+
+ def _queue_command(self, cmd):
+ """Queue the command until the connection is ready to write to again.
+ """
+ logger.info("[%s] Queing as conn %r, cmd: %r", self.id(), self.state, cmd)
+ self.pending_commands.append(cmd)
+
+ if len(self.pending_commands) > self.max_line_buffer:
+ # The other side is failing to keep up and out buffers are becoming
+ # full, so lets close the connection.
+ # XXX: should we squawk more loudly?
+ logger.error("[%s] Remote failed to keep up", self.id())
+ self.send_command(ErrorCommand("Failed to keep up"), do_buffer=False)
+ self.close()
+
+ def _send_pending_commands(self):
+ """Send any queued commandes
+ """
+ pending = self.pending_commands
+ self.pending_commands = []
+ for cmd in pending:
+ self.send_command(cmd)
+
+ def on_PING(self, line):
+ self.received_ping = True
+
+ def on_ERROR(self, cmd):
+ logger.error("[%s] Remote reported error: %r", self.id(), cmd.data)
+
+ def pauseProducing(self):
+ """This is called when both the kernel send buffer and the twisted
+ tcp connection send buffers have become full.
+
+ We don't actually have any control over those sizes, so we buffer some
+ commands ourselves before knifing the connection due to the remote
+ failing to keep up.
+ """
+ logger.info("[%s] Pause producing", self.id())
+ self.state = ConnectionStates.PAUSED
+
+ def resumeProducing(self):
+ """The remote has caught up after we started buffering!
+ """
+ logger.info("[%s] Resume producing", self.id())
+ self.state = ConnectionStates.ESTABLISHED
+ self._send_pending_commands()
+
+ def stopProducing(self):
+ """We're never going to send any more data (normally because either
+ we or the remote has closed the connection)
+ """
+ logger.info("[%s] Stop producing", self.id())
+ self.on_connection_closed()
+
+ def connectionLost(self, reason):
+ logger.info("[%s] Replication connection closed: %r", self.id(), reason)
+ if isinstance(reason, Failure):
+ connection_close_counter.inc(reason.type.__name__)
+ else:
+ connection_close_counter.inc(reason.__class__.__name__)
+
+ try:
+ # Remove us from list of connections to be monitored
+ connected_connections.remove(self)
+ except ValueError:
+ pass
+
+ # Stop the looping call sending pings.
+ if self._send_ping_loop and self._send_ping_loop.running:
+ self._send_ping_loop.stop()
+
+ self.on_connection_closed()
+
+ def on_connection_closed(self):
+ logger.info("[%s] Connection was closed", self.id())
+
+ self.state = ConnectionStates.CLOSED
+ self.pending_commands = []
+
+ if self.transport:
+ self.transport.unregisterProducer()
+
+ def __str__(self):
+ return "ReplicationConnection<name=%s,conn_id=%s,addr=%s>" % (
+ self.name, self.conn_id, self.addr,
+ )
+
+ def id(self):
+ return "%s-%s" % (self.name, self.conn_id)
+
+
+class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
+ VALID_INBOUND_COMMANDS = VALID_CLIENT_COMMANDS
+ VALID_OUTBOUND_COMMANDS = VALID_SERVER_COMMANDS
+
+ def __init__(self, server_name, clock, streamer, addr):
+ BaseReplicationStreamProtocol.__init__(self, clock) # Old style class
+
+ self.server_name = server_name
+ self.streamer = streamer
+ self.addr = addr
+
+ # The streams the client has subscribed to and is up to date with
+ self.replication_streams = set()
+
+ # The streams the client is currently subscribing to.
+ self.connecting_streams = set()
+
+ # Map from stream name to list of updates to send once we've finished
+ # subscribing the client to the stream.
+ self.pending_rdata = {}
+
+ def connectionMade(self):
+ self.send_command(ServerCommand(self.server_name))
+ BaseReplicationStreamProtocol.connectionMade(self)
+ self.streamer.new_connection(self)
+
+ def on_NAME(self, cmd):
+ logger.info("[%s] Renamed to %r", self.id(), cmd.data)
+ self.name = cmd.data
+
+ def on_USER_SYNC(self, cmd):
+ self.streamer.on_user_sync(
+ self.conn_id, cmd.user_id, cmd.is_syncing, cmd.last_sync_ms,
+ )
+
+ def on_REPLICATE(self, cmd):
+ stream_name = cmd.stream_name
+ token = cmd.token
+
+ if stream_name == "ALL":
+ # Subscribe to all streams we're publishing to.
+ for stream in self.streamer.streams_by_name.iterkeys():
+ self.subscribe_to_stream(stream, token)
+ else:
+ self.subscribe_to_stream(stream_name, token)
+
+ def on_FEDERATION_ACK(self, cmd):
+ self.streamer.federation_ack(cmd.token)
+
+ def on_REMOVE_PUSHER(self, cmd):
+ self.streamer.on_remove_pusher(cmd.app_id, cmd.push_key, cmd.user_id)
+
+ def on_INVALIDATE_CACHE(self, cmd):
+ self.streamer.on_invalidate_cache(cmd.cache_func, cmd.keys)
+
+ @defer.inlineCallbacks
+ def subscribe_to_stream(self, stream_name, token):
+ """Subscribe the remote to a streams.
+
+ This invloves checking if they've missed anything and sending those
+ updates down if they have. During that time new updates for the stream
+ are queued and sent once we've sent down any missed updates.
+ """
+ self.replication_streams.discard(stream_name)
+ self.connecting_streams.add(stream_name)
+
+ try:
+ # Get missing updates
+ updates, current_token = yield self.streamer.get_stream_updates(
+ stream_name, token,
+ )
+
+ # Send all the missing updates
+ for update in updates:
+ token, row = update[0], update[1]
+ self.send_command(RdataCommand(stream_name, token, row))
+
+ # We send a POSITION command to ensure that they have an up to
+ # date token (especially useful if we didn't send any updates
+ # above)
+ self.send_command(PositionCommand(stream_name, current_token))
+
+ # Now we can send any updates that came in while we were subscribing
+ pending_rdata = self.pending_rdata.pop(stream_name, [])
+ for token, update in pending_rdata:
+ # Only send updates newer than the current token
+ if token > current_token:
+ self.send_command(RdataCommand(stream_name, token, update))
+
+ # They're now fully subscribed
+ self.replication_streams.add(stream_name)
+ except Exception as e:
+ logger.exception("[%s] Failed to handle REPLICATE command", self.id())
+ self.send_error("failed to handle replicate: %r", e)
+ finally:
+ self.connecting_streams.discard(stream_name)
+
+ def stream_update(self, stream_name, token, data):
+ """Called when a new update is available to stream to clients.
+
+ We need to check if the client is interested in the stream or not
+ """
+ if stream_name in self.replication_streams:
+ # The client is subscribed to the stream
+ self.send_command(RdataCommand(stream_name, token, data))
+ elif stream_name in self.connecting_streams:
+ # The client is being subscribed to the stream
+ logger.debug("[%s] Queuing RDATA %r %r", self.id(), stream_name, token)
+ self.pending_rdata.setdefault(stream_name, []).append((token, data))
+ else:
+ # The client isn't subscribed
+ logger.debug("[%s] Dropping RDATA %r %r", self.id(), stream_name, token)
+
+ def send_sync(self, data):
+ self.send_command(SyncCommand(data))
+
+ def on_connection_closed(self):
+ BaseReplicationStreamProtocol.on_connection_closed(self)
+ self.streamer.lost_connection(self)
+
+
+class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
+ VALID_INBOUND_COMMANDS = VALID_SERVER_COMMANDS
+ VALID_OUTBOUND_COMMANDS = VALID_CLIENT_COMMANDS
+
+ def __init__(self, client_name, server_name, clock, handler):
+ BaseReplicationStreamProtocol.__init__(self, clock)
+
+ self.client_name = client_name
+ self.server_name = server_name
+ self.handler = handler
+
+ # Map of stream to batched updates. See RdataCommand for info on how
+ # batching works.
+ self.pending_batches = {}
+
+ def connectionMade(self):
+ self.send_command(NameCommand(self.client_name))
+ BaseReplicationStreamProtocol.connectionMade(self)
+
+ # Once we've connected subscribe to the necessary streams
+ for stream_name, token in self.handler.get_streams_to_replicate().iteritems():
+ self.replicate(stream_name, token)
+
+ # Tell the server if we have any users currently syncing (should only
+ # happen on synchrotrons)
+ currently_syncing = self.handler.get_currently_syncing_users()
+ now = self.clock.time_msec()
+ for user_id in currently_syncing:
+ self.send_command(UserSyncCommand(user_id, True, now))
+
+ # We've now finished connecting to so inform the client handler
+ self.handler.update_connection(self)
+
+ def on_SERVER(self, cmd):
+ if cmd.data != self.server_name:
+ logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
+ self.send_error("Wrong remote")
+
+ def on_RDATA(self, cmd):
+ try:
+ row = STREAMS_MAP[cmd.stream_name].ROW_TYPE(*cmd.row)
+ except Exception:
+ logger.exception(
+ "[%s] Failed to parse RDATA: %r %r",
+ self.id(), cmd.stream_name, cmd.row
+ )
+ raise
+
+ if cmd.token is None:
+ # I.e. this is part of a batch of updates for this stream. Batch
+ # until we get an update for the stream with a non None token
+ self.pending_batches.setdefault(cmd.stream_name, []).append(row)
+ else:
+ # Check if this is the last of a batch of updates
+ rows = self.pending_batches.pop(cmd.stream_name, [])
+ rows.append(row)
+
+ self.handler.on_rdata(cmd.stream_name, cmd.token, rows)
+
+ def on_POSITION(self, cmd):
+ self.handler.on_position(cmd.stream_name, cmd.token)
+
+ def on_SYNC(self, cmd):
+ self.handler.on_sync(cmd.data)
+
+ def replicate(self, stream_name, token):
+ """Send the subscription request to the server
+ """
+ if stream_name not in STREAMS_MAP:
+ raise Exception("Invalid stream name %r" % (stream_name,))
+
+ logger.info(
+ "[%s] Subscribing to replication stream: %r from %r",
+ self.id(), stream_name, token
+ )
+
+ self.send_command(ReplicateCommand(stream_name, token))
+
+ def on_connection_closed(self):
+ BaseReplicationStreamProtocol.on_connection_closed(self)
+ self.handler.update_connection(None)
+
+
+# The following simply registers metrics for the replication connections
+
+metrics.register_callback(
+ "pending_commands",
+ lambda: {
+ (p.name, p.conn_id): len(p.pending_commands)
+ for p in connected_connections
+ },
+ labels=["name", "conn_id"],
+)
+
+
+def transport_buffer_size(protocol):
+ if protocol.transport:
+ size = len(protocol.transport.dataBuffer) + protocol.transport._tempDataLen
+ return size
+ return 0
+
+
+metrics.register_callback(
+ "transport_send_buffer",
+ lambda: {
+ (p.name, p.conn_id): transport_buffer_size(p)
+ for p in connected_connections
+ },
+ labels=["name", "conn_id"],
+)
+
+
+def transport_kernel_read_buffer_size(protocol, read=True):
+ SIOCINQ = 0x541B
+ SIOCOUTQ = 0x5411
+
+ if protocol.transport:
+ fileno = protocol.transport.getHandle().fileno()
+ if read:
+ op = SIOCINQ
+ else:
+ op = SIOCOUTQ
+ size = struct.unpack("I", fcntl.ioctl(fileno, op, '\0\0\0\0'))[0]
+ return size
+ return 0
+
+
+metrics.register_callback(
+ "transport_kernel_send_buffer",
+ lambda: {
+ (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False)
+ for p in connected_connections
+ },
+ labels=["name", "conn_id"],
+)
+
+
+metrics.register_callback(
+ "transport_kernel_read_buffer",
+ lambda: {
+ (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True)
+ for p in connected_connections
+ },
+ labels=["name", "conn_id"],
+)
+
+
+metrics.register_callback(
+ "inbound_commands",
+ lambda: {
+ (k[0], p.name, p.conn_id): count
+ for p in connected_connections
+ for k, count in p.inbound_commands_counter.counts.iteritems()
+ },
+ labels=["command", "name", "conn_id"],
+)
+
+metrics.register_callback(
+ "outbound_commands",
+ lambda: {
+ (k[0], p.name, p.conn_id): count
+ for p in connected_connections
+ for k, count in p.outbound_commands_counter.counts.iteritems()
+ },
+ labels=["command", "name", "conn_id"],
+)
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
new file mode 100644
index 0000000000..8b2c4c3043
--- /dev/null
+++ b/synapse/replication/tcp/resource.py
@@ -0,0 +1,290 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The server side of the replication stream.
+"""
+
+from twisted.internet import defer, reactor
+from twisted.internet.protocol import Factory
+
+from streams import STREAMS_MAP, FederationStream
+from protocol import ServerReplicationStreamProtocol
+
+from synapse.util.metrics import Measure, measure_func
+
+import logging
+import synapse.metrics
+
+
+metrics = synapse.metrics.get_metrics_for(__name__)
+stream_updates_counter = metrics.register_counter(
+ "stream_updates", labels=["stream_name"]
+)
+user_sync_counter = metrics.register_counter("user_sync")
+federation_ack_counter = metrics.register_counter("federation_ack")
+remove_pusher_counter = metrics.register_counter("remove_pusher")
+invalidate_cache_counter = metrics.register_counter("invalidate_cache")
+
+logger = logging.getLogger(__name__)
+
+
+class ReplicationStreamProtocolFactory(Factory):
+ """Factory for new replication connections.
+ """
+ def __init__(self, hs):
+ self.streamer = ReplicationStreamer(hs)
+ self.clock = hs.get_clock()
+ self.server_name = hs.config.server_name
+
+ def buildProtocol(self, addr):
+ return ServerReplicationStreamProtocol(
+ self.server_name,
+ self.clock,
+ self.streamer,
+ addr
+ )
+
+
+class ReplicationStreamer(object):
+ """Handles replication connections.
+
+ This needs to be poked when new replication data may be available. When new
+ data is available it will propagate to all connected clients.
+ """
+
+ def __init__(self, hs):
+ self.store = hs.get_datastore()
+ self.presence_handler = hs.get_presence_handler()
+ self.clock = hs.get_clock()
+
+ # Current connections.
+ self.connections = []
+
+ metrics.register_callback("total_connections", lambda: len(self.connections))
+
+ # List of streams that clients can subscribe to.
+ # We only support federation stream if federation sending hase been
+ # disabled on the master.
+ self.streams = [
+ stream(hs) for stream in STREAMS_MAP.itervalues()
+ if stream != FederationStream or not hs.config.send_federation
+ ]
+
+ self.streams_by_name = {stream.NAME: stream for stream in self.streams}
+
+ metrics.register_callback(
+ "connections_per_stream",
+ lambda: {
+ (stream_name,): len([
+ conn for conn in self.connections
+ if stream_name in conn.replication_streams
+ ])
+ for stream_name in self.streams_by_name
+ },
+ labels=["stream_name"],
+ )
+
+ self.federation_sender = None
+ if not hs.config.send_federation:
+ self.federation_sender = hs.get_federation_sender()
+
+ hs.get_notifier().add_replication_callback(self.on_notifier_poke)
+
+ # Keeps track of whether we are currently checking for updates
+ self.is_looping = False
+ self.pending_updates = False
+
+ reactor.addSystemEventTrigger("before", "shutdown", self.on_shutdown)
+
+ def on_shutdown(self):
+ # close all connections on shutdown
+ for conn in self.connections:
+ conn.send_error("server shutting down")
+
+ @defer.inlineCallbacks
+ def on_notifier_poke(self):
+ """Checks if there is actually any new data and sends it to the
+ connections if there are.
+
+ This should get called each time new data is available, even if it
+ is currently being executed, so that nothing gets missed
+ """
+ if not self.connections:
+ # Don't bother if nothing is listening. We still need to advance
+ # the stream tokens otherwise they'll fall beihind forever
+ for stream in self.streams:
+ stream.discard_updates_and_advance()
+ return
+
+ # If we're in the process of checking for new updates, mark that fact
+ # and return
+ if self.is_looping:
+ logger.debug("Noitifier poke loop already running")
+ self.pending_updates = True
+ return
+
+ self.pending_updates = True
+ self.is_looping = True
+
+ try:
+ # Keep looping while there have been pokes about potential updates.
+ # This protects against the race where a stream we already checked
+ # gets an update while we're handling other streams.
+ while self.pending_updates:
+ self.pending_updates = False
+
+ with Measure(self.clock, "repl.stream.get_updates"):
+ # First we tell the streams that they should update their
+ # current tokens.
+ for stream in self.streams:
+ stream.advance_current_token()
+
+ for stream in self.streams:
+ if stream.last_token == stream.upto_token:
+ continue
+
+ logger.debug(
+ "Getting stream: %s: %s -> %s",
+ stream.NAME, stream.last_token, stream.upto_token
+ )
+ updates, current_token = yield stream.get_updates()
+
+ logger.debug(
+ "Sending %d updates to %d connections",
+ len(updates), len(self.connections),
+ )
+
+ if updates:
+ logger.info(
+ "Streaming: %s -> %s", stream.NAME, updates[-1][0]
+ )
+ stream_updates_counter.inc_by(len(updates), stream.NAME)
+
+ # Some streams return multiple rows with the same stream IDs,
+ # we need to make sure they get sent out in batches. We do
+ # this by setting the current token to all but the last of
+ # a series of updates with the same token to have a None
+ # token. See RdataCommand for more details.
+ batched_updates = _batch_updates(updates)
+
+ for conn in self.connections:
+ for token, row in batched_updates:
+ try:
+ conn.stream_update(stream.NAME, token, row)
+ except Exception:
+ logger.exception("Failed to replicate")
+
+ logger.debug("No more pending updates, breaking poke loop")
+ finally:
+ self.pending_updates = False
+ self.is_looping = False
+
+ @measure_func("repl.get_stream_updates")
+ def get_stream_updates(self, stream_name, token):
+ """For a given stream get all updates since token. This is called when
+ a client first subscribes to a stream.
+ """
+ stream = self.streams_by_name.get(stream_name, None)
+ if not stream:
+ raise Exception("unknown stream %s", stream_name)
+
+ return stream.get_updates_since(token)
+
+ @measure_func("repl.federation_ack")
+ def federation_ack(self, token):
+ """We've received an ack for federation stream from a client.
+ """
+ federation_ack_counter.inc()
+ if self.federation_sender:
+ self.federation_sender.federation_ack(token)
+
+ @measure_func("repl.on_user_sync")
+ def on_user_sync(self, conn_id, user_id, is_syncing, last_sync_ms):
+ """A client has started/stopped syncing on a worker.
+ """
+ user_sync_counter.inc()
+ self.presence_handler.update_external_syncs_row(
+ conn_id, user_id, is_syncing, last_sync_ms,
+ )
+
+ @measure_func("repl.on_remove_pusher")
+ @defer.inlineCallbacks
+ def on_remove_pusher(self, app_id, push_key, user_id):
+ """A client has asked us to remove a pusher
+ """
+ remove_pusher_counter.inc()
+ yield self.store.delete_pusher_by_app_id_pushkey_user_id(
+ app_id=app_id, pushkey=push_key, user_id=user_id
+ )
+
+ self.notifier.on_new_replication_data()
+
+ @measure_func("repl.on_invalidate_cache")
+ def on_invalidate_cache(self, cache_func, keys):
+ """The client has asked us to invalidate a cache
+ """
+ invalidate_cache_counter.inc()
+ getattr(self.store, cache_func).invalidate(tuple(keys))
+
+ def send_sync_to_all_connections(self, data):
+ """Sends a SYNC command to all clients.
+
+ Used in tests.
+ """
+ for conn in self.connections:
+ conn.send_sync(data)
+
+ def new_connection(self, connection):
+ """A new client connection has been established
+ """
+ self.connections.append(connection)
+
+ def lost_connection(self, connection):
+ """A client connection has been lost
+ """
+ try:
+ self.connections.remove(connection)
+ except ValueError:
+ pass
+
+ # We need to tell the presence handler that the connection has been
+ # lost so that it can handle any ongoing syncs on that connection.
+ self.presence_handler.update_external_syncs_clear(connection.conn_id)
+
+
+def _batch_updates(updates):
+ """Takes a list of updates of form [(token, row)] and sets the token to
+ None for all rows where the next row has the same token. This is used to
+ implement batching.
+
+ For example:
+
+ [(1, _), (1, _), (2, _), (3, _), (3, _)]
+
+ becomes:
+
+ [(None, _), (1, _), (2, _), (None, _), (3, _)]
+ """
+ if not updates:
+ return []
+
+ new_updates = []
+ for i, update in enumerate(updates[:-1]):
+ if update[0] == updates[i + 1][0]:
+ new_updates.append((None, update[1]))
+ else:
+ new_updates.append(update)
+
+ new_updates.append(updates[-1])
+ return new_updates
diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
new file mode 100644
index 0000000000..369d5f2428
--- /dev/null
+++ b/synapse/replication/tcp/streams.py
@@ -0,0 +1,464 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Defines all the valid streams that clients can subscribe to, and the format
+of the rows returned by each stream.
+
+Each stream is defined by the following information:
+
+ stream name: The name of the stream
+ row type: The type that is used to serialise/deserialse the row
+ current_token: The function that returns the current token for the stream
+ update_function: The function that returns a list of updates between two tokens
+"""
+
+from twisted.internet import defer
+from collections import namedtuple
+
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+MAX_EVENTS_BEHIND = 10000
+
+
+EventStreamRow = namedtuple("EventStreamRow", (
+ "event_id", # str
+ "room_id", # str
+ "type", # str
+ "state_key", # str, optional
+ "redacts", # str, optional
+))
+BackfillStreamRow = namedtuple("BackfillStreamRow", (
+ "event_id", # str
+ "room_id", # str
+ "type", # str
+ "state_key", # str, optional
+ "redacts", # str, optional
+))
+PresenceStreamRow = namedtuple("PresenceStreamRow", (
+ "user_id", # str
+ "state", # str
+ "last_active_ts", # int
+ "last_federation_update_ts", # int
+ "last_user_sync_ts", # int
+ "status_msg", # str
+ "currently_active", # bool
+))
+TypingStreamRow = namedtuple("TypingStreamRow", (
+ "room_id", # str
+ "user_ids", # list(str)
+))
+ReceiptsStreamRow = namedtuple("ReceiptsStreamRow", (
+ "room_id", # str
+ "receipt_type", # str
+ "user_id", # str
+ "event_id", # str
+ "data", # dict
+))
+PushRulesStreamRow = namedtuple("PushRulesStreamRow", (
+ "user_id", # str
+))
+PushersStreamRow = namedtuple("PushersStreamRow", (
+ "user_id", # str
+ "app_id", # str
+ "pushkey", # str
+ "deleted", # bool
+))
+CachesStreamRow = namedtuple("CachesStreamRow", (
+ "cache_func", # str
+ "keys", # list(str)
+ "invalidation_ts", # int
+))
+PublicRoomsStreamRow = namedtuple("PublicRoomsStreamRow", (
+ "room_id", # str
+ "visibility", # str
+ "appservice_id", # str, optional
+ "network_id", # str, optional
+))
+DeviceListsStreamRow = namedtuple("DeviceListsStreamRow", (
+ "user_id", # str
+ "destination", # str
+))
+ToDeviceStreamRow = namedtuple("ToDeviceStreamRow", (
+ "entity", # str
+))
+FederationStreamRow = namedtuple("FederationStreamRow", (
+ "type", # str, the type of data as defined in the BaseFederationRows
+ "data", # dict, serialization of a federation.send_queue.BaseFederationRow
+))
+TagAccountDataStreamRow = namedtuple("TagAccountDataStreamRow", (
+ "user_id", # str
+ "room_id", # str
+ "data", # dict
+))
+AccountDataStreamRow = namedtuple("AccountDataStream", (
+ "user_id", # str
+ "room_id", # str
+ "data_type", # str
+ "data", # dict
+))
+
+
+class Stream(object):
+ """Base class for the streams.
+
+ Provides a `get_updates()` function that returns new updates since the last
+ time it was called up until the point `advance_current_token` was called.
+ """
+ NAME = None # The name of the stream
+ ROW_TYPE = None # The type of the row
+ _LIMITED = True # Whether the update function takes a limit
+
+ def __init__(self, hs):
+ # The token from which we last asked for updates
+ self.last_token = self.current_token()
+
+ # The token that we will get updates up to
+ self.upto_token = self.current_token()
+
+ def advance_current_token(self):
+ """Updates `upto_token` to "now", which updates up until which point
+ get_updates[_since] will fetch rows till.
+ """
+ self.upto_token = self.current_token()
+
+ def discard_updates_and_advance(self):
+ """Called when the stream should advance but the updates would be discarded,
+ e.g. when there are no currently connected workers.
+ """
+ self.upto_token = self.current_token()
+ self.last_token = self.upto_token
+
+ @defer.inlineCallbacks
+ def get_updates(self):
+ """Gets all updates since the last time this function was called (or
+ since the stream was constructed if it hadn't been called before),
+ until the `upto_token`
+
+ Returns:
+ (list(ROW_TYPE), int): list of updates plus the token used as an
+ upper bound of the updates (i.e. the "current token")
+ """
+ updates, current_token = yield self.get_updates_since(self.last_token)
+ self.last_token = current_token
+
+ defer.returnValue((updates, current_token))
+
+ @defer.inlineCallbacks
+ def get_updates_since(self, from_token):
+ """Like get_updates except allows specifying from when we should
+ stream updates
+
+ Returns:
+ (list(ROW_TYPE), int): list of updates plus the token used as an
+ upper bound of the updates (i.e. the "current token")
+ """
+ if from_token in ("NOW", "now"):
+ defer.returnValue(([], self.upto_token))
+
+ current_token = self.upto_token
+
+ from_token = int(from_token)
+
+ if from_token == current_token:
+ defer.returnValue(([], current_token))
+
+ if self._LIMITED:
+ rows = yield self.update_function(
+ from_token, current_token,
+ limit=MAX_EVENTS_BEHIND + 1,
+ )
+
+ if len(rows) >= MAX_EVENTS_BEHIND:
+ raise Exception("stream %s has fallen behined" % (self.NAME))
+ else:
+ rows = yield self.update_function(
+ from_token, current_token,
+ )
+
+ updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows]
+
+ defer.returnValue((updates, current_token))
+
+ def current_token(self):
+ """Gets the current token of the underlying streams. Should be provided
+ by the sub classes
+
+ Returns:
+ int
+ """
+ raise NotImplementedError()
+
+ def update_function(self, from_token, current_token, limit=None):
+ """Get updates between from_token and to_token. If Stream._LIMITED is
+ True then limit is provided, otherwise it's not.
+
+ Returns:
+ Deferred(list(tuple)): the first entry in the tuple is the token for
+ that update, and the rest of the tuple gets used to construct
+ a ``ROW_TYPE`` instance
+ """
+ raise NotImplementedError()
+
+
+class EventsStream(Stream):
+ """We received a new event, or an event went from being an outlier to not
+ """
+ NAME = "events"
+ ROW_TYPE = EventStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+ self.current_token = store.get_current_events_token
+ self.update_function = store.get_all_new_forward_event_rows
+
+ super(EventsStream, self).__init__(hs)
+
+
+class BackfillStream(Stream):
+ """We fetched some old events and either we had never seen that event before
+ or it went from being an outlier to not.
+ """
+ NAME = "backfill"
+ ROW_TYPE = BackfillStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+ self.current_token = store.get_current_backfill_token
+ self.update_function = store.get_all_new_backfill_event_rows
+
+ super(BackfillStream, self).__init__(hs)
+
+
+class PresenceStream(Stream):
+ NAME = "presence"
+ _LIMITED = False
+ ROW_TYPE = PresenceStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+ presence_handler = hs.get_presence_handler()
+
+ self.current_token = store.get_current_presence_token
+ self.update_function = presence_handler.get_all_presence_updates
+
+ super(PresenceStream, self).__init__(hs)
+
+
+class TypingStream(Stream):
+ NAME = "typing"
+ _LIMITED = False
+ ROW_TYPE = TypingStreamRow
+
+ def __init__(self, hs):
+ typing_handler = hs.get_typing_handler()
+
+ self.current_token = typing_handler.get_current_token
+ self.update_function = typing_handler.get_all_typing_updates
+
+ super(TypingStream, self).__init__(hs)
+
+
+class ReceiptsStream(Stream):
+ NAME = "receipts"
+ ROW_TYPE = ReceiptsStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_max_receipt_stream_id
+ self.update_function = store.get_all_updated_receipts
+
+ super(ReceiptsStream, self).__init__(hs)
+
+
+class PushRulesStream(Stream):
+ """A user has changed their push rules
+ """
+ NAME = "push_rules"
+ ROW_TYPE = PushRulesStreamRow
+
+ def __init__(self, hs):
+ self.store = hs.get_datastore()
+ super(PushRulesStream, self).__init__(hs)
+
+ def current_token(self):
+ push_rules_token, _ = self.store.get_push_rules_stream_token()
+ return push_rules_token
+
+ @defer.inlineCallbacks
+ def update_function(self, from_token, to_token, limit):
+ rows = yield self.store.get_all_push_rule_updates(from_token, to_token, limit)
+ defer.returnValue([(row[0], row[2]) for row in rows])
+
+
+class PushersStream(Stream):
+ """A user has added/changed/removed a pusher
+ """
+ NAME = "pushers"
+ ROW_TYPE = PushersStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_pushers_stream_token
+ self.update_function = store.get_all_updated_pushers_rows
+
+ super(PushersStream, self).__init__(hs)
+
+
+class CachesStream(Stream):
+ """A cache was invalidated on the master and no other stream would invalidate
+ the cache on the workers
+ """
+ NAME = "caches"
+ ROW_TYPE = CachesStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_cache_stream_token
+ self.update_function = store.get_all_updated_caches
+
+ super(CachesStream, self).__init__(hs)
+
+
+class PublicRoomsStream(Stream):
+ """The public rooms list changed
+ """
+ NAME = "public_rooms"
+ ROW_TYPE = PublicRoomsStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_current_public_room_stream_id
+ self.update_function = store.get_all_new_public_rooms
+
+ super(PublicRoomsStream, self).__init__(hs)
+
+
+class DeviceListsStream(Stream):
+ """Someone added/changed/removed a device
+ """
+ NAME = "device_lists"
+ _LIMITED = False
+ ROW_TYPE = DeviceListsStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_device_stream_token
+ self.update_function = store.get_all_device_list_changes_for_remotes
+
+ super(DeviceListsStream, self).__init__(hs)
+
+
+class ToDeviceStream(Stream):
+ """New to_device messages for a client
+ """
+ NAME = "to_device"
+ ROW_TYPE = ToDeviceStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_to_device_stream_token
+ self.update_function = store.get_all_new_device_messages
+
+ super(ToDeviceStream, self).__init__(hs)
+
+
+class FederationStream(Stream):
+ """Data to be sent over federation. Only available when master has federation
+ sending disabled.
+ """
+ NAME = "federation"
+ ROW_TYPE = FederationStreamRow
+
+ def __init__(self, hs):
+ federation_sender = hs.get_federation_sender()
+
+ self.current_token = federation_sender.get_current_token
+ self.update_function = federation_sender.get_replication_rows
+
+ super(FederationStream, self).__init__(hs)
+
+
+class TagAccountDataStream(Stream):
+ """Someone added/removed a tag for a room
+ """
+ NAME = "tag_account_data"
+ ROW_TYPE = TagAccountDataStreamRow
+
+ def __init__(self, hs):
+ store = hs.get_datastore()
+
+ self.current_token = store.get_max_account_data_stream_id
+ self.update_function = store.get_all_updated_tags
+
+ super(TagAccountDataStream, self).__init__(hs)
+
+
+class AccountDataStream(Stream):
+ """Global or per room account data was changed
+ """
+ NAME = "account_data"
+ ROW_TYPE = AccountDataStreamRow
+
+ def __init__(self, hs):
+ self.store = hs.get_datastore()
+
+ self.current_token = self.store.get_max_account_data_stream_id
+
+ super(AccountDataStream, self).__init__(hs)
+
+ @defer.inlineCallbacks
+ def update_function(self, from_token, to_token, limit):
+ global_results, room_results = yield self.store.get_all_updated_account_data(
+ from_token, from_token, to_token, limit
+ )
+
+ results = list(room_results)
+ results.extend(
+ (stream_id, user_id, None, account_data_type, content,)
+ for stream_id, user_id, account_data_type, content in global_results
+ )
+
+ defer.returnValue(results)
+
+
+STREAMS_MAP = {
+ stream.NAME: stream
+ for stream in (
+ EventsStream,
+ BackfillStream,
+ PresenceStream,
+ TypingStream,
+ ReceiptsStream,
+ PushRulesStream,
+ PushersStream,
+ CachesStream,
+ PublicRoomsStream,
+ DeviceListsStream,
+ ToDeviceStream,
+ FederationStream,
+ TagAccountDataStream,
+ AccountDataStream,
+ )
+}
|