diff options
author | Erik Johnston <erik@matrix.org> | 2016-11-23 11:31:53 +0000 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2016-11-23 11:31:53 +0000 |
commit | b69f76c1069ac4daa570102ab3dac0bf08e8dcc1 (patch) | |
tree | b3b53d30e85bc26946c988ccc478f130b6f1c452 | |
parent | Comment (diff) | |
parent | Merge pull request #1640 from matrix-org/kegan/sync-perf (diff) | |
download | synapse-b69f76c1069ac4daa570102ab3dac0bf08e8dcc1.tar.xz |
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/split_out_fed_txn
-rw-r--r-- | CHANGES.rst | 28 | ||||
-rw-r--r-- | synapse/__init__.py | 2 | ||||
-rw-r--r-- | synapse/api/errors.py | 1 | ||||
-rw-r--r-- | synapse/api/filtering.py | 45 | ||||
-rw-r--r-- | synapse/events/utils.py | 102 | ||||
-rw-r--r-- | synapse/handlers/sync.py | 31 | ||||
-rw-r--r-- | synapse/rest/client/v2_alpha/register.py | 11 | ||||
-rw-r--r-- | synapse/rest/client/v2_alpha/sync.py | 23 | ||||
-rw-r--r-- | synapse/storage/filtering.py | 8 | ||||
-rw-r--r-- | tests/events/test_utils.py | 170 |
10 files changed, 396 insertions, 25 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 1ce58632b8..a1a0624674 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,31 @@ +Changes in synapse v0.18.4 (2016-11-22) +======================================= + +Bug fixes: + +* Add workaround for buggy clients that the fail to register (PR #1632) + + +Changes in synapse v0.18.4-rc1 (2016-11-14) +=========================================== + +Changes: + +* Various database efficiency improvements (PR #1188, #1192) +* Update default config to blacklist more internal IPs, thanks to Euan Kemp (PR + #1198) +* Allow specifying duration in minutes in config, thanks to Daniel Dent (PR + #1625) + + +Bug fixes: + +* Fix media repo to set CORs headers on responses (PR #1190) +* Fix registration to not error on non-ascii passwords (PR #1191) +* Fix create event code to limit the number of prev_events (PR #1615) +* Fix bug in transaction ID deduplication (PR #1624) + + Changes in synapse v0.18.3 (2016-11-08) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index d366b69dab..432567a110 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.18.3" +__version__ = "0.18.4" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 0041646858..921c457738 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -39,6 +39,7 @@ class Codes(object): CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_INVALID = "M_CAPTCHA_INVALID" MISSING_PARAM = "M_MISSING_PARAM" + INVALID_PARAM = "M_INVALID_PARAM" TOO_LARGE = "M_TOO_LARGE" EXCLUSIVE = "M_EXCLUSIVE" THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 3b3ef70750..fb291d7fb9 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -71,6 +71,21 @@ class Filtering(object): if key in user_filter_json["room"]: self._check_definition(user_filter_json["room"][key]) + if "event_fields" in user_filter_json: + if type(user_filter_json["event_fields"]) != list: + raise SynapseError(400, "event_fields must be a list of strings") + for field in user_filter_json["event_fields"]: + if not isinstance(field, basestring): + raise SynapseError(400, "Event field must be a string") + # Don't allow '\\' in event field filters. This makes matching + # events a lot easier as we can then use a negative lookbehind + # assertion to split '\.' If we allowed \\ then it would + # incorrectly split '\\.' See synapse.events.utils.serialize_event + if r'\\' in field: + raise SynapseError( + 400, r'The escape character \ cannot itself be escaped' + ) + def _check_definition_room_lists(self, definition): """Check that "rooms" and "not_rooms" are lists of room ids if they are present @@ -152,6 +167,7 @@ class FilterCollection(object): self.include_leave = filter_json.get("room", {}).get( "include_leave", False ) + self.event_fields = filter_json.get("event_fields", []) def __repr__(self): return "<FilterCollection %s>" % (json.dumps(self._filter_json),) @@ -186,6 +202,26 @@ class FilterCollection(object): def filter_room_account_data(self, events): return self._room_account_data.filter(self._room_filter.filter(events)) + def blocks_all_presence(self): + return ( + self._presence_filter.filters_all_types() or + self._presence_filter.filters_all_senders() + ) + + def blocks_all_room_ephemeral(self): + return ( + self._room_ephemeral_filter.filters_all_types() or + self._room_ephemeral_filter.filters_all_senders() or + self._room_ephemeral_filter.filters_all_rooms() + ) + + def blocks_all_room_timeline(self): + return ( + self._room_timeline_filter.filters_all_types() or + self._room_timeline_filter.filters_all_senders() or + self._room_timeline_filter.filters_all_rooms() + ) + class Filter(object): def __init__(self, filter_json): @@ -202,6 +238,15 @@ class Filter(object): self.contains_url = self.filter_json.get("contains_url", None) + def filters_all_types(self): + return "*" in self.not_types + + def filters_all_senders(self): + return "*" in self.not_senders + + def filters_all_rooms(self): + return "*" in self.not_rooms + def check(self, event): """Checks whether the filter matches the given event. diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 0e9fd902af..5bbaef8187 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -16,6 +16,17 @@ from synapse.api.constants import EventTypes from . import EventBase +from frozendict import frozendict + +import re + +# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' +# (?<!stuff) matches if the current position in the string is not preceded +# by a match for 'stuff'. +# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as +# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar" +SPLIT_FIELD_REGEX = re.compile(r'(?<!\\)\.') + def prune_event(event): """ Returns a pruned version of the given event, which removes all keys we @@ -97,6 +108,83 @@ def prune_event(event): ) +def _copy_field(src, dst, field): + """Copy the field in 'src' to 'dst'. + + For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"] + then dst={"foo":{"bar":5}}. + + Args: + src(dict): The dict to read from. + dst(dict): The dict to modify. + field(list<str>): List of keys to drill down to in 'src'. + """ + if len(field) == 0: # this should be impossible + return + if len(field) == 1: # common case e.g. 'origin_server_ts' + if field[0] in src: + dst[field[0]] = src[field[0]] + return + + # Else is a nested field e.g. 'content.body' + # Pop the last field as that's the key to move across and we need the + # parent dict in order to access the data. Drill down to the right dict. + key_to_move = field.pop(-1) + sub_dict = src + for sub_field in field: # e.g. sub_field => "content" + if sub_field in sub_dict and type(sub_dict[sub_field]) in [dict, frozendict]: + sub_dict = sub_dict[sub_field] + else: + return + + if key_to_move not in sub_dict: + return + + # Insert the key into the output dictionary, creating nested objects + # as required. We couldn't do this any earlier or else we'd need to delete + # the empty objects if the key didn't exist. + sub_out_dict = dst + for sub_field in field: + sub_out_dict = sub_out_dict.setdefault(sub_field, {}) + sub_out_dict[key_to_move] = sub_dict[key_to_move] + + +def only_fields(dictionary, fields): + """Return a new dict with only the fields in 'dictionary' which are present + in 'fields'. + + If there are no event fields specified then all fields are included. + The entries may include '.' charaters to indicate sub-fields. + So ['content.body'] will include the 'body' field of the 'content' object. + A literal '.' character in a field name may be escaped using a '\'. + + Args: + dictionary(dict): The dictionary to read from. + fields(list<str>): A list of fields to copy over. Only shallow refs are + taken. + Returns: + dict: A new dictionary with only the given fields. If fields was empty, + the same dictionary is returned. + """ + if len(fields) == 0: + return dictionary + + # for each field, convert it: + # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]] + split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields] + + # for each element of the output array of arrays: + # remove escaping so we can use the right key names. + split_fields[:] = [ + [f.replace(r'\.', r'.') for f in field_array] for field_array in split_fields + ] + + output = {} + for field_array in split_fields: + _copy_field(dictionary, output, field_array) + return output + + def format_event_raw(d): return d @@ -137,7 +225,7 @@ def format_event_for_client_v2_without_room_id(d): def serialize_event(e, time_now_ms, as_client_event=True, event_format=format_event_for_client_v1, - token_id=None): + token_id=None, only_event_fields=None): # FIXME(erikj): To handle the case of presence events and the like if not isinstance(e, EventBase): return e @@ -164,6 +252,12 @@ def serialize_event(e, time_now_ms, as_client_event=True, d["unsigned"]["transaction_id"] = txn_id if as_client_event: - return event_format(d) - else: - return d + d = event_format(d) + + if only_event_fields: + if (not isinstance(only_event_fields, list) or + not all(isinstance(f, basestring) for f in only_event_fields)): + raise TypeError("only_event_fields must be a list of strings") + d = only_fields(d, only_event_fields) + + return d diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1f910ff814..a86996689c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -277,6 +277,7 @@ class SyncHandler(object): """ with Measure(self.clock, "load_filtered_recents"): timeline_limit = sync_config.filter_collection.timeline_limit() + block_all_timeline = sync_config.filter_collection.blocks_all_room_timeline() if recents is None or newly_joined_room or timeline_limit < len(recents): limited = True @@ -293,7 +294,7 @@ class SyncHandler(object): else: recents = [] - if not limited: + if not limited or block_all_timeline: defer.returnValue(TimelineBatch( events=recents, prev_batch=now_token, @@ -531,9 +532,14 @@ class SyncHandler(object): ) newly_joined_rooms, newly_joined_users = res - yield self._generate_sync_entry_for_presence( - sync_result_builder, newly_joined_rooms, newly_joined_users + block_all_presence_data = ( + since_token is None and + sync_config.filter_collection.blocks_all_presence() ) + if not block_all_presence_data: + yield self._generate_sync_entry_for_presence( + sync_result_builder, newly_joined_rooms, newly_joined_users + ) yield self._generate_sync_entry_for_to_device(sync_result_builder) @@ -709,13 +715,20 @@ class SyncHandler(object): `(newly_joined_rooms, newly_joined_users)` """ user_id = sync_result_builder.sync_config.user.to_string() - - now_token, ephemeral_by_room = yield self.ephemeral_by_room( - sync_result_builder.sync_config, - now_token=sync_result_builder.now_token, - since_token=sync_result_builder.since_token, + block_all_room_ephemeral = ( + sync_result_builder.since_token is None and + sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral() ) - sync_result_builder.now_token = now_token + + if block_all_room_ephemeral: + ephemeral_by_room = {} + else: + now_token, ephemeral_by_room = yield self.ephemeral_by_room( + sync_result_builder.sync_config, + now_token=sync_result_builder.now_token, + since_token=sync_result_builder.since_token, + ) + sync_result_builder.now_token = now_token ignored_account_data = yield self.store.get_global_account_data_by_type_for_user( "m.ignored_user_list", user_id=user_id, diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 68d18a9b82..6cfb20866b 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -169,6 +169,17 @@ class RegisterRestServlet(RestServlet): guest_access_token = body.get("guest_access_token", None) + if ( + 'initial_device_display_name' in body and + 'password' not in body + ): + # ignore 'initial_device_display_name' if sent without + # a password to work around a client bug where it sent + # the 'initial_device_display_name' param alone, wiping out + # the original registration params + logger.warn("Ignoring initial_device_display_name without password") + del body['initial_device_display_name'] + session_id = self.auth_handler.get_session_id(body) registered_user_id = None if session_id: diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 6fc63715aa..7199ec883a 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -162,7 +162,7 @@ class SyncRestServlet(RestServlet): time_now = self.clock.time_msec() joined = self.encode_joined( - sync_result.joined, time_now, requester.access_token_id + sync_result.joined, time_now, requester.access_token_id, filter.event_fields ) invited = self.encode_invited( @@ -170,7 +170,7 @@ class SyncRestServlet(RestServlet): ) archived = self.encode_archived( - sync_result.archived, time_now, requester.access_token_id + sync_result.archived, time_now, requester.access_token_id, filter.event_fields ) response_content = { @@ -197,7 +197,7 @@ class SyncRestServlet(RestServlet): formatted.append(event) return {"events": formatted} - def encode_joined(self, rooms, time_now, token_id): + def encode_joined(self, rooms, time_now, token_id, event_fields): """ Encode the joined rooms in a sync result @@ -208,7 +208,8 @@ class SyncRestServlet(RestServlet): calculations token_id(int): ID of the user's auth token - used for namespacing of transaction IDs - + event_fields(list<str>): List of event fields to include. If empty, + all fields will be returned. Returns: dict[str, dict[str, object]]: the joined rooms list, in our response format @@ -216,7 +217,7 @@ class SyncRestServlet(RestServlet): joined = {} for room in rooms: joined[room.room_id] = self.encode_room( - room, time_now, token_id + room, time_now, token_id, only_fields=event_fields ) return joined @@ -253,7 +254,7 @@ class SyncRestServlet(RestServlet): return invited - def encode_archived(self, rooms, time_now, token_id): + def encode_archived(self, rooms, time_now, token_id, event_fields): """ Encode the archived rooms in a sync result @@ -264,7 +265,8 @@ class SyncRestServlet(RestServlet): calculations token_id(int): ID of the user's auth token - used for namespacing of transaction IDs - + event_fields(list<str>): List of event fields to include. If empty, + all fields will be returned. Returns: dict[str, dict[str, object]]: The invited rooms list, in our response format @@ -272,13 +274,13 @@ class SyncRestServlet(RestServlet): joined = {} for room in rooms: joined[room.room_id] = self.encode_room( - room, time_now, token_id, joined=False + room, time_now, token_id, joined=False, only_fields=event_fields ) return joined @staticmethod - def encode_room(room, time_now, token_id, joined=True): + def encode_room(room, time_now, token_id, joined=True, only_fields=None): """ Args: room (JoinedSyncResult|ArchivedSyncResult): sync result for a @@ -289,7 +291,7 @@ class SyncRestServlet(RestServlet): of transaction IDs joined (bool): True if the user is joined to this room - will mean we handle ephemeral events - + only_fields(list<str>): Optional. The list of event fields to include. Returns: dict[str, object]: the room, encoded in our response format """ @@ -298,6 +300,7 @@ class SyncRestServlet(RestServlet): return serialize_event( event, time_now, token_id=token_id, event_format=format_event_for_client_v2_without_room_id, + only_event_fields=only_fields, ) state_dict = room.state diff --git a/synapse/storage/filtering.py b/synapse/storage/filtering.py index 5248736816..a2ccc66ea7 100644 --- a/synapse/storage/filtering.py +++ b/synapse/storage/filtering.py @@ -16,6 +16,7 @@ from twisted.internet import defer from ._base import SQLBaseStore +from synapse.api.errors import SynapseError, Codes from synapse.util.caches.descriptors import cachedInlineCallbacks import simplejson as json @@ -24,6 +25,13 @@ import simplejson as json class FilteringStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2) def get_user_filter(self, user_localpart, filter_id): + # filter_id is BIGINT UNSIGNED, so if it isn't a number, fail + # with a coherent error message rather than 500 M_UNKNOWN. + try: + int(filter_id) + except ValueError: + raise SynapseError(400, "Invalid filter ID", Codes.INVALID_PARAM) + def_json = yield self._simple_select_one_onecol( table="user_filters", keyvalues={ diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index fb0953c4ec..29f068d1f1 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -17,7 +17,11 @@ from .. import unittest from synapse.events import FrozenEvent -from synapse.events.utils import prune_event +from synapse.events.utils import prune_event, serialize_event + + +def MockEvent(**kwargs): + return FrozenEvent(kwargs) class PruneEventTestCase(unittest.TestCase): @@ -114,3 +118,167 @@ class PruneEventTestCase(unittest.TestCase): 'unsigned': {}, } ) + + +class SerializeEventTestCase(unittest.TestCase): + + def serialize(self, ev, fields): + return serialize_event(ev, 1479807801915, only_event_fields=fields) + + def test_event_fields_works_with_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar" + ), + ["room_id"] + ), + { + "room_id": "!foo:bar", + } + ) + + def test_event_fields_works_with_nested_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "body": "A message", + }, + ), + ["content.body"] + ), + { + "content": { + "body": "A message", + } + } + ) + + def test_event_fields_works_with_dot_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "key.with.dots": {}, + }, + ), + ["content.key\.with\.dots"] + ), + { + "content": { + "key.with.dots": {}, + } + } + ) + + def test_event_fields_works_with_nested_dot_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "not_me": 1, + "nested.dot.key": { + "leaf.key": 42, + "not_me_either": 1, + }, + }, + ), + ["content.nested\.dot\.key.leaf\.key"] + ), + { + "content": { + "nested.dot.key": { + "leaf.key": 42, + }, + } + } + ) + + def test_event_fields_nops_with_unknown_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "foo": "bar", + }, + ), + ["content.foo", "content.notexists"] + ), + { + "content": { + "foo": "bar", + } + } + ) + + def test_event_fields_nops_with_non_dict_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "foo": ["I", "am", "an", "array"], + }, + ), + ["content.foo.am"] + ), + {} + ) + + def test_event_fields_nops_with_array_keys(self): + self.assertEquals( + self.serialize( + MockEvent( + sender="@alice:localhost", + room_id="!foo:bar", + content={ + "foo": ["I", "am", "an", "array"], + }, + ), + ["content.foo.1"] + ), + {} + ) + + def test_event_fields_all_fields_if_empty(self): + self.assertEquals( + self.serialize( + MockEvent( + room_id="!foo:bar", + content={ + "foo": "bar", + }, + ), + [] + ), + { + "room_id": "!foo:bar", + "content": { + "foo": "bar", + }, + "unsigned": {} + } + ) + + def test_event_fields_fail_if_fields_not_str(self): + with self.assertRaises(TypeError): + self.serialize( + MockEvent( + room_id="!foo:bar", + content={ + "foo": "bar", + }, + ), + ["room_id", 4] + ) |