From a99e9335502df3389ff6f16ef52c43ce391b6955 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 09:34:24 +0100 Subject: Add upgrade script that will slowly prune state_groups_state entries --- synapse/replication/slave/storage/events.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index cbebd5b2f7..15c52774a2 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -86,6 +86,9 @@ class SlavedEventStore(BaseSlavedStore): _get_state_groups_from_groups = ( StateStore.__dict__["_get_state_groups_from_groups"] ) + _get_state_groups_from_groups_txn = ( + DataStore._get_state_groups_from_groups_txn.__func__ + ) _get_state_group_from_group = ( StateStore.__dict__["_get_state_group_from_group"] ) -- cgit 1.5.1 From 2a0159b8aeaf8dce808345e2266c6d3301fa055a Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 7 Sep 2016 15:58:00 +0100 Subject: Fix the stream change cache to work over replication --- synapse/replication/slave/storage/deviceinbox.py | 11 +++++++++++ synapse/storage/__init__.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 64d8eb2af1..251078ba57 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -16,6 +16,7 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker from synapse.storage import DataStore +from synapse.util.caches.stream_change_cache import StreamChangeCache class SlavedDeviceInboxStore(BaseSlavedStore): @@ -24,6 +25,10 @@ class SlavedDeviceInboxStore(BaseSlavedStore): self._device_inbox_id_gen = SlavedIdTracker( db_conn, "device_inbox", "stream_id", ) + self._device_inbox_stream_cache = StreamChangeCache( + "DeviceInboxStreamChangeCache", + self._device_inbox_id_gen.get_current_token() + ) get_to_device_stream_token = DataStore.get_to_device_stream_token.__func__ get_new_messages_for_device = DataStore.get_new_messages_for_device.__func__ @@ -38,5 +43,11 @@ class SlavedDeviceInboxStore(BaseSlavedStore): stream = result.get("to_device") if stream: self._device_inbox_id_gen.advance(int(stream["position"])) + for row in stream["rows"]: + stream_id = row[0] + user_id = row[1] + self._device_inbox_stream_cache.entity_has_changed( + user_id, stream_id + ) return super(SlavedDeviceInboxStore, self).process_replication(result) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6965daddc5..828e5ca60b 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -202,7 +202,7 @@ class DataStore(RoomMemberStore, RoomStore, max_value=max_device_inbox_id, ) self._device_federation_outbox_stream_cache = StreamChangeCache( - "DeviceInboxStreamChangeCache", min_device_outbox_id, + "DeviceFederationOutboxStreamChangeCache", min_device_outbox_id, prefilled_cache=device_outbox_prefill, ) -- cgit 1.5.1 From 6a6cbfcf1e12c0f34a280764f892eaa23e720d57 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 9 Sep 2016 11:48:23 +0100 Subject: Track the max_stream_device_id in a separate table, since we delete from the inbox table --- synapse/replication/slave/storage/deviceinbox.py | 2 +- synapse/storage/__init__.py | 2 +- synapse/storage/deviceinbox.py | 17 +++++++++++++++-- synapse/storage/schema/delta/35/device_stream_id.sql | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/35/device_stream_id.sql (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 251078ba57..3bfd5e8213 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -23,7 +23,7 @@ class SlavedDeviceInboxStore(BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceInboxStore, self).__init__(db_conn, hs) self._device_inbox_id_gen = SlavedIdTracker( - db_conn, "device_inbox", "stream_id", + db_conn, "device_max_stream_id", "stream_id", ) self._device_inbox_stream_cache = StreamChangeCache( "DeviceInboxStreamChangeCache", diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 828e5ca60b..a61e83d5de 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -111,7 +111,7 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "presence_stream", "stream_id" ) self._device_inbox_id_gen = StreamIdGenerator( - db_conn, "device_inbox", "stream_id" + db_conn, "device_max_stream_id", "stream_id" ) self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 658fbef27b..b729b7106e 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -130,6 +130,13 @@ class DeviceInboxStore(SQLBaseStore): def _add_messages_to_local_device_inbox_txn(self, txn, stream_id, messages_by_user_then_device): + sql = ( + "UPDATE device_max_stream_id" + " SET stream_id = ?" + " WHERE stream_id < ?" + ) + txn.execute(sql, (stream_id, stream_id)) + local_by_user_then_device = {} for user_id, messages_by_device in messages_by_user_then_device.items(): messages_json_for_user = {} @@ -148,6 +155,8 @@ class DeviceInboxStore(SQLBaseStore): device = row[0] messages_json_for_user[device] = message_json else: + if not devices: + continue sql = ( "SELECT device_id FROM devices" " WHERE user_id = ? AND device_id IN (" @@ -164,7 +173,11 @@ class DeviceInboxStore(SQLBaseStore): message_json = ujson.dumps(messages_by_device[device]) messages_json_for_user[device] = message_json - local_by_user_then_device[user_id] = messages_json_for_user + if messages_json_for_user: + local_by_user_then_device[user_id] = messages_json_for_user + + if not local_by_user_then_device: + return sql = ( "INSERT INTO device_inbox" @@ -301,7 +314,7 @@ class DeviceInboxStore(SQLBaseStore): has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( destination, last_stream_id ) - if not has_changed: + if not has_changed or last_stream_id == current_stream_id: return defer.succeed(([], current_stream_id)) def get_new_messages_for_remote_destination_txn(txn): diff --git a/synapse/storage/schema/delta/35/device_stream_id.sql b/synapse/storage/schema/delta/35/device_stream_id.sql new file mode 100644 index 0000000000..1ce6336f33 --- /dev/null +++ b/synapse/storage/schema/delta/35/device_stream_id.sql @@ -0,0 +1,20 @@ +/* Copyright 2016 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE device_max_stream_id ( + stream_id BIGINT NOT NULL +); + +INSERT INTO device_max_stream_id (stream_id) VALUES (0); -- cgit 1.5.1 From ab80d5e0a968beb48140534b9ceab62b285b35c9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Sep 2016 14:05:01 +0100 Subject: Drop replication log levels --- synapse/federation/transaction_queue.py | 1 - synapse/replication/resource.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse/replication') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index d9b8b3fc1d..1ac569b305 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -229,7 +229,6 @@ class TransactionQueue(object): "dropping transaction for now", destination, ) - success = False finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None) diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 1ed9034bcb..857bc9795c 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -181,7 +181,7 @@ class ReplicationResource(Resource): def replicate(self, request_streams, limit): writer = _Writer() current_token = yield self.current_replication_token() - logger.info("Replicating up to %r", current_token) + logger.debug("Replicating up to %r", current_token) yield self.account_data(writer, current_token, limit, request_streams) yield self.events(writer, current_token, limit, request_streams) @@ -195,7 +195,7 @@ class ReplicationResource(Resource): yield self.to_device(writer, current_token, limit, request_streams) self.streams(writer, current_token, request_streams) - logger.info("Replicated %d rows", writer.total) + logger.debug("Replicated %d rows", writer.total) defer.returnValue(writer.finish()) def streams(self, writer, current_token, request_streams): -- cgit 1.5.1 From a4339de9de417394b170b608491183374e1c09bf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Sep 2016 16:44:26 +0100 Subject: Correctly handle typing stream id resetting --- synapse/replication/resource.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'synapse/replication') diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 857bc9795c..299e9419a4 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -274,11 +274,18 @@ class ReplicationResource(Resource): @defer.inlineCallbacks def typing(self, writer, current_token, request_streams): - current_position = current_token.presence + current_position = current_token.typing request_typing = request_streams.get("typing") if request_typing is not None: + # If they have a higher token than current max, we can assume that + # they had been talking to a previous instance of the master. Since + # we reset the token on restart, the best (but hacky) thing we can + # do is to simply resend down all the typing notifications. + if request_typing > current_position: + request_typing = 0 + typing_rows = yield self.typing_handler.get_all_typing_updates( request_typing, current_position ) -- cgit 1.5.1 From 211786ecd629588f2481c94217a4a388b090c993 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Sep 2016 11:47:23 +0100 Subject: Stream public room changes down replication --- synapse/replication/resource.py | 20 ++++++++++++++++++- synapse/replication/slave/storage/events.py | 8 ++++++++ synapse/replication/slave/storage/room.py | 31 +++++++++++++++++++++++++++++ synapse/storage/room.py | 16 +++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) (limited to 'synapse/replication') diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 299e9419a4..9aab3ce23c 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -42,6 +42,7 @@ STREAM_NAMES = ( ("pushers",), ("caches",), ("to_device",), + ("public_rooms",), ) @@ -131,6 +132,7 @@ class ReplicationResource(Resource): push_rules_token, room_stream_token = self.store.get_push_rules_stream_token() pushers_token = self.store.get_pushers_stream_token() caches_token = self.store.get_cache_stream_token() + public_rooms_token = self.store.get_current_public_room_stream_id() defer.returnValue(_ReplicationToken( room_stream_token, @@ -144,6 +146,7 @@ class ReplicationResource(Resource): 0, # State stream is no longer a thing caches_token, int(stream_token.to_device_key), + int(public_rooms_token), )) @request_handler() @@ -193,6 +196,7 @@ class ReplicationResource(Resource): yield self.pushers(writer, current_token, limit, request_streams) yield self.caches(writer, current_token, limit, request_streams) yield self.to_device(writer, current_token, limit, request_streams) + yield self.public_rooms(writer, current_token, limit, request_streams) self.streams(writer, current_token, request_streams) logger.debug("Replicated %d rows", writer.total) @@ -400,6 +404,20 @@ class ReplicationResource(Resource): "position", "user_id", "device_id", "message_json" )) + @defer.inlineCallbacks + def public_rooms(self, writer, current_token, limit, request_streams): + current_position = current_token.public_rooms + + public_rooms = request_streams.get("public_rooms") + + if public_rooms is not None: + public_rooms_rows = yield self.store.get_all_new_public_rooms( + public_rooms, current_position, limit + ) + writer.write_header_and_rows("public_rooms", public_rooms_rows, ( + "position", "room_id", "visibility" + )) + class _Writer(object): """Writes the streams as a JSON object as the response to the request""" @@ -428,7 +446,7 @@ class _Writer(object): class _ReplicationToken(collections.namedtuple("_ReplicationToken", ( "events", "presence", "typing", "receipts", "account_data", "backfill", - "push_rules", "pushers", "state", "caches", "to_device", + "push_rules", "pushers", "state", "caches", "to_device", "public_rooms", ))): __slots__ = [] diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 15c52774a2..f8965c73a0 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -61,6 +61,8 @@ class SlavedEventStore(BaseSlavedStore): "MembershipStreamChangeCache", events_max, ) + self.stream_ordering_month_ago = 0 + # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. get_rooms_for_user = RoomMemberStore.__dict__["get_rooms_for_user"] @@ -168,6 +170,12 @@ class SlavedEventStore(BaseSlavedStore): get_auth_chain_ids = DataStore.get_auth_chain_ids.__func__ _get_auth_chain_ids_txn = DataStore._get_auth_chain_ids_txn.__func__ + get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ + + get_forward_extremeties_for_room = ( + DataStore.get_forward_extremeties_for_room.__func__ + ) + def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py index d5bb0f98ea..81743941dc 100644 --- a/synapse/replication/slave/storage/room.py +++ b/synapse/replication/slave/storage/room.py @@ -15,7 +15,38 @@ from ._base import BaseSlavedStore from synapse.storage import DataStore +from ._slaved_id_tracker import SlavedIdTracker class RoomStore(BaseSlavedStore): + def __init__(self, db_conn, hs): + super(RoomStore, self).__init__(db_conn, hs) + self._public_room_id_gen = SlavedIdTracker( + db_conn, "public_room_list_stream", "stream_id" + ) + get_public_room_ids = DataStore.get_public_room_ids.__func__ + get_current_public_room_stream_id = ( + DataStore.get_current_public_room_stream_id.__func__ + ) + get_public_room_ids_at_stream_id = ( + DataStore.get_public_room_ids_at_stream_id.__func__ + ) + get_public_room_ids_at_stream_id_txn = ( + DataStore.get_public_room_ids_at_stream_id_txn.__func__ + ) + get_published_at_stream_id_txn = ( + DataStore.get_published_at_stream_id_txn.__func__ + ) + + def stream_positions(self): + result = super(RoomStore, self).stream_positions() + result["public_rooms"] = self._public_room_id_gen.get_current_token() + return result + + def process_replication(self, result): + stream = result.get("public_rooms") + if stream: + self._public_room_id_gen.advance(int(stream["position"])) + + return super(RoomStore, self).process_replication(result) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 8aa4545939..2ef13d7403 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -307,3 +307,19 @@ class RoomStore(SQLBaseStore): return self.runInteraction( "get_public_room_changes", get_public_room_changes_txn ) + + def get_all_new_public_rooms(self, prev_id, current_id, limit): + def get_all_new_public_rooms(txn): + sql = (""" + SELECT stream_id, room_id, visibility FROM public_room_list_stream + WHERE stream_id > ? AND stream_id <= ? + ORDER BY stream_id ASC + LIMIT ? + """) + + txn.execute(sql, (prev_id, current_id, limit,)) + return txn.fetchall() + + return self.runInteraction( + "get_all_new_public_rooms", get_all_new_public_rooms + ) -- cgit 1.5.1 From 55e6fc917c5c6b93b200706b3ef24cb27d80ff93 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Sep 2016 14:04:28 +0100 Subject: Add cache to get_forward_extremeties_for_room --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/event_federation.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8965c73a0..842ced02d6 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -173,7 +173,7 @@ class SlavedEventStore(BaseSlavedStore): get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ get_forward_extremeties_for_room = ( - DataStore.get_forward_extremeties_for_room.__func__ + EventFederationStore.__dict__["get_forward_extremeties_for_room"] ) def stream_positions(self): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index ec6dbe5492..050b78d652 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -344,6 +344,7 @@ class EventFederationStore(SQLBaseStore): self.get_latest_event_ids_in_room.invalidate, (room_id,) ) + @cached(max_entries=5000, num_args=2) def get_forward_extremeties_for_room(self, room_id, stream_ordering): """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". -- cgit 1.5.1 From cb3edec6af55efb126f5e7ee66c4d895ef35a66e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Sep 2016 14:27:15 +0100 Subject: Use stream_change cache to make get_forward_extremeties_for_room cache more effective --- synapse/replication/slave/storage/events.py | 5 ++++- synapse/storage/event_federation.py | 11 ++++++++++- synapse/util/caches/stream_change_cache.py | 5 +++++ 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 842ced02d6..cc32c66792 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -173,7 +173,10 @@ class SlavedEventStore(BaseSlavedStore): get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ get_forward_extremeties_for_room = ( - EventFederationStore.__dict__["get_forward_extremeties_for_room"] + DataStore.get_forward_extremeties_for_room.__func__ + ) + _get_forward_extremeties_for_room = ( + EventFederationStore.__dict__["_get_forward_extremeties_for_room"] ) def stream_positions(self): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 050b78d652..97d0c26475 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -344,8 +344,17 @@ class EventFederationStore(SQLBaseStore): self.get_latest_event_ids_in_room.invalidate, (room_id,) ) - @cached(max_entries=5000, num_args=2) def get_forward_extremeties_for_room(self, room_id, stream_ordering): + # We want to make the cache more effective, so we clamp to the last + # change before the given ordering. + last_change = self._events_stream_cache.get_pos_of_last_change(room_id) + if last_change: + stream_ordering = min(last_change, stream_ordering) + + return self._get_forward_extremeties_for_room(room_id, stream_ordering) + + @cached(max_entries=5000, num_args=2) + def _get_forward_extremeties_for_room(self, room_id, stream_ordering): """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 3c051dabc4..5c2a433e41 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -121,3 +121,8 @@ class StreamChangeCache(object): k, r = self._cache.popitem() self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos) self._entity_to_key.pop(r, None) + + def get_pos_of_last_change(self, entity): + """Returns the stream pos of the last change for an entitiy, if known. + """ + return self._entity_to_key.get(entity, None) -- cgit 1.5.1 From 418bcd43093b10fc93487376e23973278850b502 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Sep 2016 08:37:39 +0100 Subject: Add new storage function to slave store --- synapse/replication/slave/storage/events.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index cc32c66792..0c26e96e98 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -62,6 +62,7 @@ class SlavedEventStore(BaseSlavedStore): ) self.stream_ordering_month_ago = 0 + self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. -- cgit 1.5.1 From 995f2f032fd37273d976bc94a2b5b28b2e2abbbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Sep 2016 14:48:21 +0100 Subject: Fix public room pagination for client_reader app --- synapse/replication/slave/storage/room.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/replication') diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py index 81743941dc..23c613863f 100644 --- a/synapse/replication/slave/storage/room.py +++ b/synapse/replication/slave/storage/room.py @@ -38,6 +38,7 @@ class RoomStore(BaseSlavedStore): get_published_at_stream_id_txn = ( DataStore.get_published_at_stream_id_txn.__func__ ) + get_public_room_changes = DataStore.get_public_room_changes.__func__ def stream_positions(self): result = super(RoomStore, self).stream_positions() -- cgit 1.5.1