diff options
author | Erik Johnston <erik@matrix.org> | 2016-09-02 11:04:48 +0100 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2016-09-02 11:04:48 +0100 |
commit | 657847e4c6b955911768e7d3025ac1b245208882 (patch) | |
tree | a826411c63b4691a7356ce2740e7b4438b8ead78 | |
parent | Limit the length of state chains (diff) | |
parent | Merge pull request #1061 from matrix-org/erikj/linearize_resolution (diff) | |
download | synapse-657847e4c6b955911768e7d3025ac1b245208882.tar.xz |
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/state_storage
-rw-r--r-- | README.rst | 6 | ||||
-rw-r--r-- | synapse/appservice/api.py | 12 | ||||
-rw-r--r-- | synapse/config/appservice.py | 17 | ||||
-rw-r--r-- | synapse/federation/federation_client.py | 10 | ||||
-rw-r--r-- | synapse/handlers/presence.py | 2 | ||||
-rw-r--r-- | synapse/push/mailer.py | 2 | ||||
-rw-r--r-- | synapse/replication/resource.py | 19 | ||||
-rw-r--r-- | synapse/replication/slave/storage/deviceinbox.py | 12 | ||||
-rw-r--r-- | synapse/rest/client/v2_alpha/sendtodevice.py | 10 | ||||
-rw-r--r-- | synapse/state.py | 144 | ||||
-rw-r--r-- | synapse/storage/deviceinbox.py | 44 | ||||
-rw-r--r-- | synapse/storage/receipts.py | 2 | ||||
-rw-r--r-- | synapse/storage/schema/delta/34/sent_txn_purge.py | 32 | ||||
-rw-r--r-- | synapse/storage/transactions.py | 2 |
14 files changed, 230 insertions, 84 deletions
diff --git a/README.rst b/README.rst index 9eebed784b..f1ccc8dc45 100644 --- a/README.rst +++ b/README.rst @@ -134,6 +134,12 @@ Installing prerequisites on Raspbian:: sudo pip install --upgrade ndg-httpsclient sudo pip install --upgrade virtualenv +Installing prerequisites on openSUSE:: + + sudo zypper in -t pattern devel_basis + sudo zypper in python-pip python-setuptools sqlite3 python-virtualenv \ + python-devel libffi-devel libopenssl-devel libjpeg62-devel + To install the synapse homeserver run:: virtualenv -p python2.7 ~/.synapse diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 775417eb21..cc4af23962 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -67,6 +67,8 @@ class ApplicationServiceApi(SimpleHttpClient): @defer.inlineCallbacks def query_user(self, service, user_id): + if service.url is None: + defer.returnValue(False) uri = service.url + ("/users/%s" % urllib.quote(user_id)) response = None try: @@ -86,6 +88,8 @@ class ApplicationServiceApi(SimpleHttpClient): @defer.inlineCallbacks def query_alias(self, service, alias): + if service.url is None: + defer.returnValue(False) uri = service.url + ("/rooms/%s" % urllib.quote(alias)) response = None try: @@ -113,6 +117,8 @@ class ApplicationServiceApi(SimpleHttpClient): raise ValueError( "Unrecognised 'kind' argument %r to query_3pe()", kind ) + if service.url is None: + defer.returnValue([]) uri = "%s%s/thirdparty/%s/%s" % ( service.url, @@ -145,6 +151,9 @@ class ApplicationServiceApi(SimpleHttpClient): defer.returnValue([]) def get_3pe_protocol(self, service, protocol): + if service.url is None: + defer.returnValue({}) + @defer.inlineCallbacks def _get(): uri = "%s%s/thirdparty/protocol/%s" % ( @@ -166,6 +175,9 @@ class ApplicationServiceApi(SimpleHttpClient): @defer.inlineCallbacks def push_bulk(self, service, events, txn_id=None): + if service.url is None: + defer.returnValue(True) + events = self._serialize(events) if txn_id is None: diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index dfe43b0b4c..d7537e8d44 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -86,7 +86,7 @@ def load_appservices(hostname, config_files): def _load_appservice(hostname, as_info, config_filename): required_string_fields = [ - "id", "url", "as_token", "hs_token", "sender_localpart" + "id", "as_token", "hs_token", "sender_localpart" ] for field in required_string_fields: if not isinstance(as_info.get(field), basestring): @@ -94,6 +94,14 @@ def _load_appservice(hostname, as_info, config_filename): field, config_filename, )) + # 'url' must either be a string or explicitly null, not missing + # to avoid accidentally turning off push for ASes. + if (not isinstance(as_info.get("url"), basestring) and + as_info.get("url", "") is not None): + raise KeyError( + "Required string field or explicit null: 'url' (%s)" % (config_filename,) + ) + localpart = as_info["sender_localpart"] if urllib.quote(localpart) != localpart: raise ValueError( @@ -132,6 +140,13 @@ def _load_appservice(hostname, as_info, config_filename): for p in protocols: if not isinstance(p, str): raise KeyError("Bad value for 'protocols' item") + + if as_info["url"] is None: + logger.info( + "(%s) Explicitly empty 'url' provided. This application service" + " will not receive events or queries.", + config_filename, + ) return ApplicationService( token=as_info["as_token"], url=as_info["url"], diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 67ad3dfd37..627acc6a4f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -269,7 +269,7 @@ class FederationClient(FederationBase): pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) - pdu = None + signed_pdu = None for destination in destinations: now = self._clock.time_msec() last_attempt = pdu_attempts.get(destination, 0) @@ -299,7 +299,7 @@ class FederationClient(FederationBase): pdu = pdu_list[0] # Check signatures are correct. - pdu = yield self._check_sigs_and_hashes([pdu])[0] + signed_pdu = yield self._check_sigs_and_hashes([pdu])[0] break @@ -322,10 +322,10 @@ class FederationClient(FederationBase): ) continue - if self._get_pdu_cache is not None and pdu: - self._get_pdu_cache[event_id] = pdu + if self._get_pdu_cache is not None and signed_pdu: + self._get_pdu_cache[event_id] = signed_pdu - defer.returnValue(pdu) + defer.returnValue(signed_pdu) @defer.inlineCallbacks @log_function diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index d22adadc38..cf82a2336e 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -230,7 +230,7 @@ class PresenceHandler(object): """ logger.info( "Performing _persist_unpersisted_changes. Persiting %d unpersisted changes", - len(self.user_to_current_state) + len(self.unpersisted_users_changes) ) unpersisted = self.unpersisted_users_changes diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 81b1af4a19..2cafcfd8f5 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -338,7 +338,7 @@ class Mailer(object): # want the generated-from-names one here otherwise we'll # end up with, "new message from Bob in the Bob room" room_name = yield calculate_room_name( - state_by_room[room_id], user_id, fallback_to_members=False + self.store, state_by_room[room_id], user_id, fallback_to_members=False ) my_member_event = state_by_room[room_id][("m.room.member", user_id)] diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 817f50428f..1ed9034bcb 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -41,6 +41,7 @@ STREAM_NAMES = ( ("push_rules",), ("pushers",), ("caches",), + ("to_device",), ) @@ -142,6 +143,7 @@ class ReplicationResource(Resource): pushers_token, 0, # State stream is no longer a thing caches_token, + int(stream_token.to_device_key), )) @request_handler() @@ -190,6 +192,7 @@ class ReplicationResource(Resource): yield self.push_rules(writer, current_token, limit, request_streams) yield self.pushers(writer, current_token, limit, request_streams) yield self.caches(writer, current_token, limit, request_streams) + yield self.to_device(writer, current_token, limit, request_streams) self.streams(writer, current_token, request_streams) logger.info("Replicated %d rows", writer.total) @@ -376,6 +379,20 @@ class ReplicationResource(Resource): "position", "cache_func", "keys", "invalidation_ts" )) + @defer.inlineCallbacks + def to_device(self, writer, current_token, limit, request_streams): + current_position = current_token.to_device + + to_device = request_streams.get("to_device") + + if to_device is not None: + to_device_rows = yield self.store.get_all_new_device_messages( + to_device, current_position, limit + ) + writer.write_header_and_rows("to_device", to_device_rows, ( + "position", "user_id", "device_id", "message_json" + )) + class _Writer(object): """Writes the streams as a JSON object as the response to the request""" @@ -404,7 +421,7 @@ class _Writer(object): class _ReplicationToken(collections.namedtuple("_ReplicationToken", ( "events", "presence", "typing", "receipts", "account_data", "backfill", - "push_rules", "pushers", "state", "caches", + "push_rules", "pushers", "state", "caches", "to_device", ))): __slots__ = [] diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 7583d23708..64d8eb2af1 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -28,3 +28,15 @@ class SlavedDeviceInboxStore(BaseSlavedStore): get_to_device_stream_token = DataStore.get_to_device_stream_token.__func__ get_new_messages_for_device = DataStore.get_new_messages_for_device.__func__ delete_messages_for_device = DataStore.delete_messages_for_device.__func__ + + def stream_positions(self): + result = super(SlavedDeviceInboxStore, self).stream_positions() + result["to_device"] = self._device_inbox_id_gen.get_current_token() + return result + + def process_replication(self, result): + stream = result.get("to_device") + if stream: + self._device_inbox_id_gen.advance(int(stream["position"])) + + return super(SlavedDeviceInboxStore, self).process_replication(result) diff --git a/synapse/rest/client/v2_alpha/sendtodevice.py b/synapse/rest/client/v2_alpha/sendtodevice.py index 00533741af..7c0991ca55 100644 --- a/synapse/rest/client/v2_alpha/sendtodevice.py +++ b/synapse/rest/client/v2_alpha/sendtodevice.py @@ -40,6 +40,7 @@ class SendToDeviceRestServlet(servlet.RestServlet): self.hs = hs self.auth = hs.get_auth() self.store = hs.get_datastore() + self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.txns = HttpTransactionStore() @@ -71,9 +72,14 @@ class SendToDeviceRestServlet(servlet.RestServlet): } for device_id, message_content in by_device.items() } - local_messages[user_id] = messages_by_device + if messages_by_device: + local_messages[user_id] = messages_by_device - yield self.store.add_messages_to_device_inbox(local_messages) + stream_id = yield self.store.add_messages_to_device_inbox(local_messages) + + self.notifier.on_new_event( + "to_device", stream_id, users=local_messages.keys() + ) response = (200, {}) self.txns.store_client_transaction(request, txn_id, response) diff --git a/synapse/state.py b/synapse/state.py index cd428e83cd..4520fa0415 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.auth import AuthEventTypes from synapse.events.snapshot import EventContext +from synapse.util.async import Linearizer from collections import namedtuple @@ -87,6 +88,7 @@ class StateHandler(object): # dict of set of event_ids -> _StateCacheEntry. self._state_cache = None + self.resolve_linearizer = Linearizer() def start_caching(self): logger.debug("start_caching") @@ -297,85 +299,85 @@ class StateHandler(object): delta_ids={}, )) - if self._state_cache is not None: - cache = self._state_cache.get(group_names, None) - if cache: - defer.returnValue(cache) + with (yield self.resolve_linearizer.queue(group_names)): + if self._state_cache is not None: + cache = self._state_cache.get(group_names, None) + if cache: + defer.returnValue(cache) - logger.info( - "Resolving state for %s with %d groups", room_id, len(state_groups_ids) - ) - - state = {} - for st in state_groups_ids.values(): - for key, e_id in st.items(): - state.setdefault(key, set()).add(e_id) + logger.info( + "Resolving state for %s with %d groups", room_id, len(state_groups_ids) + ) - conflicted_state = { - k: list(v) - for k, v in state.items() - if len(v) > 1 - } + state = {} + for st in state_groups_ids.values(): + for key, e_id in st.items(): + state.setdefault(key, set()).add(e_id) - if conflicted_state: - logger.info("Resolving conflicted state for %r", room_id) - state_map = yield self.store.get_events( - [e_id for st in state_groups_ids.values() for e_id in st.values()], - get_prev_content=False - ) - state_sets = [ - [state_map[e_id] for key, e_id in st.items() if e_id in state_map] - for st in state_groups_ids.values() - ] - new_state, _ = self._resolve_events( - state_sets, event_type, state_key - ) - new_state = { - key: e.event_id for key, e in new_state.items() - } - else: - new_state = { - key: e_ids.pop() for key, e_ids in state.items() + conflicted_state = { + k: list(v) + for k, v in state.items() + if len(v) > 1 } - state_group = None - new_state_event_ids = frozenset(new_state.values()) - for sg, events in state_groups_ids.items(): - if new_state_event_ids == frozenset(e_id for e_id in events): - state_group = sg - break - - if state_group is None: - # Worker instances don't have access to this method, but we want - # to set the state_group on the main instance to increase cache - # hits. - if hasattr(self.store, "get_next_state_group"): - state_group = self.store.get_next_state_group() - - prev_group = None - delta_ids = None - for old_group, old_ids in state_groups_ids.items(): - if not set(new_state.iterkeys()) - set(old_ids.iterkeys()): - n_delta_ids = { - k: v - for k, v in new_state.items() - if old_ids.get(k) != v + if conflicted_state: + logger.info("Resolving conflicted state for %r", room_id) + state_map = yield self.store.get_events( + [e_id for st in state_groups_ids.values() for e_id in st.values()], + get_prev_content=False + ) + state_sets = [ + [state_map[e_id] for key, e_id in st.items() if e_id in state_map] + for st in state_groups_ids.values() + ] + new_state, _ = self._resolve_events( + state_sets, event_type, state_key + ) + new_state = { + key: e.event_id for key, e in new_state.items() } - if not delta_ids or len(n_delta_ids) < len(delta_ids): - prev_group = old_group - delta_ids = n_delta_ids - - cache = _StateCacheEntry( - state=new_state, - state_group=state_group, - prev_group=prev_group, - delta_ids=delta_ids, - ) + else: + new_state = { + key: e_ids.pop() for key, e_ids in state.items() + } + + state_group = None + new_state_event_ids = frozenset(new_state.values()) + for sg, events in state_groups_ids.items(): + if new_state_event_ids == frozenset(e_id for e_id in events): + state_group = sg + break + if state_group is None: + # Worker instances don't have access to this method, but we want + # to set the state_group on the main instance to increase cache + # hits. + if hasattr(self.store, "get_next_state_group"): + state_group = self.store.get_next_state_group() + + prev_group = None + delta_ids = None + for old_group, old_ids in state_groups_ids.items(): + if not set(new_state.iterkeys()) - set(old_ids.iterkeys()): + n_delta_ids = { + k: v + for k, v in new_state.items() + if old_ids.get(k) != v + } + if not delta_ids or len(n_delta_ids) < len(delta_ids): + prev_group = old_group + delta_ids = n_delta_ids + + cache = _StateCacheEntry( + state=new_state, + state_group=state_group, + prev_group=prev_group, + delta_ids=delta_ids, + ) - if self._state_cache is not None: - self._state_cache[group_names] = cache + if self._state_cache is not None: + self._state_cache[group_names] = cache - defer.returnValue(cache) + defer.returnValue(cache) def resolve_events(self, state_sets, event): logger.info( diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index e682fe1bd1..2fa0a218b9 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -33,7 +33,8 @@ class DeviceInboxStore(SQLBaseStore): messages_by_user_and_device(dict): Dictionary of user_id to device_id to message. Returns: - A deferred that resolves when the messages have been inserted. + A deferred stream_id that resolves when the messages have been + inserted. """ def select_devices_txn(txn, user_id, devices): @@ -81,6 +82,8 @@ class DeviceInboxStore(SQLBaseStore): stream_id ) + defer.returnValue(self._device_inbox_id_gen.get_current_token()) + def get_new_messages_for_device( self, user_id, device_id, current_stream_id, limit=100 ): @@ -136,5 +139,44 @@ class DeviceInboxStore(SQLBaseStore): "delete_messages_for_device", delete_messages_for_device_txn ) + def get_all_new_device_messages(self, last_pos, current_pos, limit): + """ + Args: + last_pos(int): + current_pos(int): + limit(int): + Returns: + A deferred list of rows from the device inbox + """ + if last_pos == current_pos: + return defer.succeed([]) + + def get_all_new_device_messages_txn(txn): + sql = ( + "SELECT stream_id FROM device_inbox" + " WHERE ? < stream_id AND stream_id <= ?" + " GROUP BY stream_id" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, (last_pos, current_pos, limit)) + stream_ids = txn.fetchall() + if not stream_ids: + return [] + max_stream_id_in_limit = stream_ids[-1] + + sql = ( + "SELECT stream_id, user_id, device_id, message_json" + " FROM device_inbox" + " WHERE ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + ) + txn.execute(sql, (last_pos, max_stream_id_in_limit)) + return txn.fetchall() + + return self.runInteraction( + "get_all_new_device_messages", get_all_new_device_messages_txn + ) + def get_to_device_stream_token(self): return self._device_inbox_id_gen.get_current_token() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index ccc3811e84..9747a04a9a 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -145,7 +145,7 @@ class ReceiptsStore(SQLBaseStore): defer.returnValue([ev for res in results.values() for ev in res]) - @cachedInlineCallbacks(num_args=3, max_entries=5000, tree=True) + @cachedInlineCallbacks(num_args=3, tree=True) def get_linearized_receipts_for_room(self, room_id, to_key, from_key=None): """Get receipts for a single room for sending to clients. diff --git a/synapse/storage/schema/delta/34/sent_txn_purge.py b/synapse/storage/schema/delta/34/sent_txn_purge.py new file mode 100644 index 0000000000..81948e3431 --- /dev/null +++ b/synapse/storage/schema/delta/34/sent_txn_purge.py @@ -0,0 +1,32 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine + +import logging + +logger = logging.getLogger(__name__) + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + cur.execute("TRUNCATE sent_transactions") + else: + cur.execute("DELETE FROM sent_transactions") + + cur.execute("CREATE INDEX sent_transactions_ts ON sent_transactions(ts)") + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 58d4de4f1d..1c588bd46b 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -387,8 +387,10 @@ class TransactionStore(SQLBaseStore): def _cleanup_transactions(self): now = self._clock.time_msec() month_ago = now - 30 * 24 * 60 * 60 * 1000 + six_hours_ago = now - 6 * 60 * 60 * 1000 def _cleanup_transactions_txn(txn): txn.execute("DELETE FROM received_transactions WHERE ts < ?", (month_ago,)) + txn.execute("DELETE FROM sent_transactions WHERE ts < ?", (six_hours_ago,)) return self.runInteraction("_persist_in_mem_txns", _cleanup_transactions_txn) |