From cecbd636e94f4e900ef6d246b62698ff1c8ee352 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Thu, 20 Aug 2015 16:21:35 +0100 Subject: /tokenrefresh POST endpoint This allows refresh tokens to be exchanged for (access_token, refresh_token). It also starts issuing them on login, though no clients currently interpret them. --- synapse/storage/schema/delta/23/refresh_tokens.sql | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 synapse/storage/schema/delta/23/refresh_tokens.sql (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/23/refresh_tokens.sql b/synapse/storage/schema/delta/23/refresh_tokens.sql new file mode 100644 index 0000000000..46839e016c --- /dev/null +++ b/synapse/storage/schema/delta/23/refresh_tokens.sql @@ -0,0 +1,21 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE IF NOT EXISTS refresh_tokens( + id INTEGER PRIMARY KEY AUTOINCREMENT, + token TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (token) +); -- cgit 1.4.1 From 037481a033aeb80b86a2e7e074e29cfaf8c23ea8 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 24 Aug 2015 17:48:57 +0100 Subject: Remove autoincrement since we incrementing the ID in the storage layer --- synapse/storage/schema/delta/23/refresh_tokens.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/23/refresh_tokens.sql b/synapse/storage/schema/delta/23/refresh_tokens.sql index 46839e016c..437b1ac1be 100644 --- a/synapse/storage/schema/delta/23/refresh_tokens.sql +++ b/synapse/storage/schema/delta/23/refresh_tokens.sql @@ -14,7 +14,7 @@ */ CREATE TABLE IF NOT EXISTS refresh_tokens( - id INTEGER PRIMARY KEY AUTOINCREMENT, + id INTEGER PRIMARY KEY, token TEXT NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) -- cgit 1.4.1 From dffc9c4ae0eea0616cc017c7f858f8a923202075 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 14 Sep 2015 14:41:37 +0100 Subject: Drop unused index --- synapse/storage/schema/delta/23/drop_state_index.sql | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 synapse/storage/schema/delta/23/drop_state_index.sql (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/23/drop_state_index.sql b/synapse/storage/schema/delta/23/drop_state_index.sql new file mode 100644 index 0000000000..07d0ea5cb2 --- /dev/null +++ b/synapse/storage/schema/delta/23/drop_state_index.sql @@ -0,0 +1,16 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +DROP INDEX IF EXISTS state_groups_state_tuple; -- cgit 1.4.1 From 7213588083dd9a721b0cd623fe22b308f25f19a5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 22 Sep 2015 12:57:40 +0100 Subject: Implement configurable stats reporting SYN-287 This requires that HS owners either opt in or out of stats reporting. When --generate-config is passed, --report-stats must be specified If an already-generated config is used, and doesn't have the report_stats key, it is requested to be set. --- synapse/app/homeserver.py | 35 ++++++- synapse/app/synctl.py | 12 ++- synapse/config/_base.py | 45 +++++++- synapse/config/appservice.py | 2 +- synapse/config/captcha.py | 2 +- synapse/config/database.py | 2 +- synapse/config/key.py | 2 +- synapse/config/logger.py | 2 +- synapse/config/metrics.py | 8 +- synapse/config/ratelimiting.py | 2 +- synapse/config/registration.py | 2 +- synapse/config/repository.py | 2 +- synapse/config/saml2.py | 2 +- synapse/config/server.py | 2 +- synapse/config/tls.py | 2 +- synapse/config/voip.py | 2 +- synapse/storage/__init__.py | 20 +++- synapse/storage/events.py | 58 ++++++++++- synapse/storage/registration.py | 12 +++ .../storage/schema/delta/24/stats_reporting.sql | 22 ++++ tests/storage/event_injector.py | 81 ++++++++++++++ tests/storage/test_events.py | 116 +++++++++++++++++++++ tests/storage/test_room.py | 2 +- tests/storage/test_stream.py | 68 +++--------- 24 files changed, 425 insertions(+), 78 deletions(-) create mode 100644 synapse/storage/schema/delta/24/stats_reporting.sql create mode 100644 tests/storage/event_injector.py create mode 100644 tests/storage/test_events.py (limited to 'synapse/storage/schema') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 15c0a4a003..b4429bd4f3 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -42,7 +42,7 @@ from synapse.storage import ( from synapse.server import HomeServer -from twisted.internet import reactor +from twisted.internet import reactor, task, defer from twisted.application import service from twisted.enterprise import adbapi from twisted.web.resource import Resource, EncodingResourceWrapper @@ -677,6 +677,39 @@ def run(hs): ThreadPool._worker = profile(ThreadPool._worker) reactor.run = profile(reactor.run) + start_time = hs.get_clock().time() + + @defer.inlineCallbacks + def phone_stats_home(): + now = int(hs.get_clock().time()) + uptime = int(now - start_time) + if uptime < 0: + uptime = 0 + + stats = {} + stats["homeserver"] = hs.config.server_name + stats["timestamp"] = now + stats["uptime_seconds"] = uptime + stats["total_users"] = yield hs.get_datastore().count_all_users() + + all_rooms = yield hs.get_datastore().get_rooms(False) + stats["total_room_count"] = len(all_rooms) + + stats["daily_active_users"] = yield hs.get_datastore().count_daily_users() + daily_messages = yield hs.get_datastore().count_daily_messages() + if daily_messages is not None: + stats["daily_messages"] = daily_messages + + logger.info("Reporting stats to matrix.org: %s" % (stats,)) + hs.get_simple_http_client().put_json( + "https://matrix.org/report-usage-stats/push", + stats + ) + + if hs.config.report_stats: + phone_home_task = task.LoopingCall(phone_stats_home) + phone_home_task.start(60 * 60 * 24, now=False) + def in_thread(): with LoggingContext("run"): change_resource_limit(hs.config.soft_file_limit) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 1f7d543c31..6bcc437591 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -25,6 +25,7 @@ SYNAPSE = ["python", "-B", "-m", "synapse.app.homeserver"] CONFIGFILE = "homeserver.yaml" GREEN = "\x1b[1;32m" +RED = "\x1b[1;31m" NORMAL = "\x1b[m" if not os.path.exists(CONFIGFILE): @@ -45,8 +46,15 @@ def start(): print "Starting ...", args = SYNAPSE args.extend(["--daemonize", "-c", CONFIGFILE]) - subprocess.check_call(args) - print GREEN + "started" + NORMAL + try: + subprocess.check_call(args) + print GREEN + "started" + NORMAL + except subprocess.CalledProcessError as e: + print ( + RED + + "error starting (exit code: %d); see above for logs" % e.returncode + + NORMAL + ) def stop(): diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 8a75c48733..b9983f72a2 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -26,6 +26,16 @@ class ConfigError(Exception): class Config(object): + stats_reporting_begging_spiel = ( + "We would really appreciate it if you could help our project out by " + "reporting anonymized usage statistics from your homeserver. Only very " + "basic aggregate data (e.g. number of users) will be reported, but it " + "helps us to track the growth of the Matrix community, and helps us to " + "make Matrix a success, as well as to convince other networks that they " + "should peer with us.\n" + "Thank you." + ) + @staticmethod def parse_size(value): if isinstance(value, int) or isinstance(value, long): @@ -111,11 +121,14 @@ class Config(object): results.append(getattr(cls, name)(self, *args, **kargs)) return results - def generate_config(self, config_dir_path, server_name): + def generate_config(self, config_dir_path, server_name, report_stats=None): default_config = "# vim:ft=yaml\n" default_config += "\n\n".join(dedent(conf) for conf in self.invoke_all( - "default_config", config_dir_path, server_name + "default_config", + config_dir_path=config_dir_path, + server_name=server_name, + report_stats=report_stats, )) config = yaml.load(default_config) @@ -139,6 +152,12 @@ class Config(object): action="store_true", help="Generate a config file for the server name" ) + config_parser.add_argument( + "--report-stats", + action="store", + help="Stuff", + choices=["yes", "no"] + ) config_parser.add_argument( "--generate-keys", action="store_true", @@ -189,6 +208,11 @@ class Config(object): config_files.append(config_path) if config_args.generate_config: + if config_args.report_stats is None: + config_parser.error( + "Please specify either --report-stats=yes or --report-stats=no\n\n" + + cls.stats_reporting_begging_spiel + ) if not config_files: config_parser.error( "Must supply a config file.\nA config file can be automatically" @@ -211,7 +235,9 @@ class Config(object): os.makedirs(config_dir_path) with open(config_path, "wb") as config_file: config_bytes, config = obj.generate_config( - config_dir_path, server_name + config_dir_path=config_dir_path, + server_name=server_name, + report_stats=(config_args.report_stats == "yes"), ) obj.invoke_all("generate_files", config) config_file.write(config_bytes) @@ -261,9 +287,20 @@ class Config(object): specified_config.update(yaml_config) server_name = specified_config["server_name"] - _, config = obj.generate_config(config_dir_path, server_name) + _, config = obj.generate_config( + config_dir_path=config_dir_path, + server_name=server_name + ) config.pop("log_config") config.update(specified_config) + if "report_stats" not in config: + sys.stderr.write( + "Please opt in or out of reporting anonymized homeserver usage " + "statistics, by setting the report_stats key in your config file " + " ( " + config_path + " ) " + + "to either True or False.\n\n" + + Config.stats_reporting_begging_spiel + "\n") + sys.exit(1) if generate_keys: obj.invoke_all("generate_files", config) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 38f41933b7..b8d301995e 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -20,7 +20,7 @@ class AppServiceConfig(Config): def read_config(self, config): self.app_service_config_files = config.get("app_service_config_files", []) - def default_config(cls, config_dir_path, server_name): + def default_config(cls, **kwargs): return """\ # A list of application service config file to use app_service_config_files: [] diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 15a132b4e3..dd92fcd0dc 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -24,7 +24,7 @@ class CaptchaConfig(Config): self.captcha_bypass_secret = config.get("captcha_bypass_secret") self.recaptcha_siteverify_api = config["recaptcha_siteverify_api"] - def default_config(self, config_dir_path, server_name): + def default_config(self, **kwargs): return """\ ## Captcha ## diff --git a/synapse/config/database.py b/synapse/config/database.py index f0611e8884..baeda8f300 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -45,7 +45,7 @@ class DatabaseConfig(Config): self.set_databasepath(config.get("database_path")) - def default_config(self, config, config_dir_path): + def default_config(self, **kwargs): database_path = self.abspath("homeserver.db") return """\ # Database configuration diff --git a/synapse/config/key.py b/synapse/config/key.py index 23ac8a3fca..2c187065e5 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -40,7 +40,7 @@ class KeyConfig(Config): config["perspectives"] ) - def default_config(self, config_dir_path, server_name): + def default_config(self, config_dir_path, server_name, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) return """\ ## Signing Keys ## diff --git a/synapse/config/logger.py b/synapse/config/logger.py index daca698d0c..bd0c17c861 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -70,7 +70,7 @@ class LoggingConfig(Config): self.log_config = self.abspath(config.get("log_config")) self.log_file = self.abspath(config.get("log_file")) - def default_config(self, config_dir_path, server_name): + def default_config(self, config_dir_path, server_name, **kwargs): log_file = self.abspath("homeserver.log") log_config = self.abspath( os.path.join(config_dir_path, server_name + ".log.config") diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ae5a691527..825fec9a38 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -19,13 +19,15 @@ from ._base import Config class MetricsConfig(Config): def read_config(self, config): self.enable_metrics = config["enable_metrics"] + self.report_stats = config.get("report_stats", None) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") - def default_config(self, config_dir_path, server_name): - return """\ + def default_config(self, report_stats=None, **kwargs): + suffix = "" if report_stats is None else "report_stats: %(report_stats)s\n" + return ("""\ ## Metrics ### # Enable collection and rendering of performance metrics enable_metrics: False - """ + """ + suffix) % locals() diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 76d9970e5b..611b598ec7 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -27,7 +27,7 @@ class RatelimitConfig(Config): self.federation_rc_reject_limit = config["federation_rc_reject_limit"] self.federation_rc_concurrent = config["federation_rc_concurrent"] - def default_config(self, config_dir_path, server_name): + def default_config(self, **kwargs): return """\ ## Ratelimiting ## diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 62de4b399f..fa98eced34 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -34,7 +34,7 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.macaroon_secret_key = config.get("macaroon_secret_key") - def default_config(self, config_dir, server_name): + def default_config(self, **kwargs): registration_shared_secret = random_string_with_symbols(50) macaroon_secret_key = random_string_with_symbols(50) return """\ diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 64644b9a7a..2fcf872449 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -60,7 +60,7 @@ class ContentRepositoryConfig(Config): config["thumbnail_sizes"] ) - def default_config(self, config_dir_path, server_name): + def default_config(self, **kwargs): media_store = self.default_path("media_store") uploads_path = self.default_path("uploads") return """ diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index 1532036876..4c6133cf22 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -41,7 +41,7 @@ class SAML2Config(Config): self.saml2_config_path = None self.saml2_idp_redirect_url = None - def default_config(self, config_dir_path, server_name): + def default_config(self, config_dir_path, server_name, **kwargs): return """ # Enable SAML2 for registration and login. Uses pysaml2 # config_path: Path to the sp_conf.py configuration file diff --git a/synapse/config/server.py b/synapse/config/server.py index a03e55c223..4d12d49857 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -117,7 +117,7 @@ class ServerConfig(Config): self.content_addr = content_addr - def default_config(self, config_dir_path, server_name): + def default_config(self, server_name, **kwargs): if ":" in server_name: bind_port = int(server_name.split(":")[1]) unsecure_port = bind_port - 400 diff --git a/synapse/config/tls.py b/synapse/config/tls.py index e6023a718d..0ac2698293 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -50,7 +50,7 @@ class TlsConfig(Config): "use_insecure_ssl_client_just_for_testing_do_not_use" ) - def default_config(self, config_dir_path, server_name): + def default_config(self, config_dir_path, server_name, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) tls_certificate_path = base_key_name + ".tls.crt" diff --git a/synapse/config/voip.py b/synapse/config/voip.py index a1707223d3..a093354ccd 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -22,7 +22,7 @@ class VoipConfig(Config): self.turn_shared_secret = config["turn_shared_secret"] self.turn_user_lifetime = self.parse_duration(config["turn_user_lifetime"]) - def default_config(self, config_dir_path, server_name): + def default_config(self, **kwargs): return """\ ## Turn ## diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 77cb1dbd81..b64c90d631 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -54,7 +54,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 23 +SCHEMA_VERSION = 24 dir_path = os.path.abspath(os.path.dirname(__file__)) @@ -126,6 +126,24 @@ class DataStore(RoomMemberStore, RoomStore, lock=False, ) + @defer.inlineCallbacks + def count_daily_users(self): + def _count_users(txn): + txn.execute( + "SELECT COUNT(DISTINCT user_id) AS users" + " FROM user_ips" + " WHERE last_seen > ?", + # This is close enough to a day for our purposes. + (int(self._clock.time_msec()) - (1000 * 60 * 60 * 24),) + ) + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + def get_user_ip_and_agents(self, user): return self._simple_select_list( table="user_ips", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0a477e3122..2b51db9940 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from _base import SQLBaseStore, _RollbackButIsFineException from twisted.internet import defer, reactor @@ -28,6 +27,7 @@ from canonicaljson import encode_canonical_json from contextlib import contextmanager import logging +import math import ujson as json logger = logging.getLogger(__name__) @@ -905,3 +905,59 @@ class EventsStore(SQLBaseStore): txn.execute(sql, (event.event_id,)) result = txn.fetchone() return result[0] if result else None + + @defer.inlineCallbacks + def count_daily_messages(self): + def _count_messages(txn): + now = self.hs.get_clock().time() + + txn.execute( + "SELECT reported_stream_token, reported_time FROM stats_reporting" + ) + last_reported = self.cursor_to_dict(txn) + + txn.execute( + "SELECT stream_ordering" + " FROM events" + " ORDER BY stream_ordering DESC" + " LIMIT 1" + ) + now_reporting = self.cursor_to_dict(txn) + if not now_reporting: + return None + now_reporting = now_reporting[0]["stream_ordering"] + + txn.execute("DELETE FROM stats_reporting") + txn.execute( + "INSERT INTO stats_reporting" + " (reported_stream_token, reported_time)" + " VALUES (?, ?)", + (now_reporting, now,) + ) + + if not last_reported: + return None + + # Close enough to correct for our purposes. + yesterday = (now - 24 * 60 * 60) + if math.fabs(yesterday - last_reported[0]["reported_time"]) > 60 * 60: + return None + + txn.execute( + "SELECT COUNT(*) as messages" + " FROM events NATURAL JOIN event_json" + " WHERE json like '%m.room.message%'" + " AND stream_ordering > ?" + " AND stream_ordering <= ?", + ( + last_reported[0]["reported_stream_token"], + now_reporting, + ) + ) + rows = self.cursor_to_dict(txn) + if not rows: + return None + return rows[0]["messages"] + + ret = yield self.runInteraction("count_messages", _count_messages) + defer.returnValue(ret) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index c9ceb132ae..6d76237658 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -289,3 +289,15 @@ class RegistrationStore(SQLBaseStore): if ret: defer.returnValue(ret['user_id']) defer.returnValue(None) + + @defer.inlineCallbacks + def count_all_users(self): + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) diff --git a/synapse/storage/schema/delta/24/stats_reporting.sql b/synapse/storage/schema/delta/24/stats_reporting.sql new file mode 100644 index 0000000000..e9165d2917 --- /dev/null +++ b/synapse/storage/schema/delta/24/stats_reporting.sql @@ -0,0 +1,22 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Should only ever contain one row +CREATE TABLE IF NOT EXISTS stats_reporting( + -- The stream ordering token which was most recently reported as stats + reported_stream_token INTEGER, + -- The time (seconds since epoch) stats were most recently reported + reported_time BIGINT +); diff --git a/tests/storage/event_injector.py b/tests/storage/event_injector.py new file mode 100644 index 0000000000..42bd8928bd --- /dev/null +++ b/tests/storage/event_injector.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from tests import unittest +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.types import UserID, RoomID + +from tests.utils import setup_test_homeserver + +from mock import Mock + + +class EventInjector: + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + self.message_handler = hs.get_handlers().message_handler + self.event_builder_factory = hs.get_event_builder_factory() + + @defer.inlineCallbacks + def create_room(self, room): + builder = self.event_builder_factory.new({ + "type": EventTypes.Create, + "room_id": room.to_string(), + "content": {}, + }) + + event, context = yield self.message_handler._create_new_client_event( + builder + ) + + yield self.store.persist_event(event, context) + + @defer.inlineCallbacks + def inject_room_member(self, room, user, membership): + builder = self.event_builder_factory.new({ + "type": EventTypes.Member, + "sender": user.to_string(), + "state_key": user.to_string(), + "room_id": room.to_string(), + "content": {"membership": membership}, + }) + + event, context = yield self.message_handler._create_new_client_event( + builder + ) + + yield self.store.persist_event(event, context) + + defer.returnValue(event) + + @defer.inlineCallbacks + def inject_message(self, room, user, body): + builder = self.event_builder_factory.new({ + "type": EventTypes.Message, + "sender": user.to_string(), + "state_key": user.to_string(), + "room_id": room.to_string(), + "content": {"body": body, "msgtype": u"message"}, + }) + + event, context = yield self.message_handler._create_new_client_event( + builder + ) + + yield self.store.persist_event(event, context) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py new file mode 100644 index 0000000000..313013009e --- /dev/null +++ b/tests/storage/test_events.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import uuid +from mock.mock import Mock +from synapse.types import RoomID, UserID + +from tests import unittest +from twisted.internet import defer +from tests.storage.event_injector import EventInjector + +from tests.utils import setup_test_homeserver + + +class EventsStoreTestCase(unittest.TestCase): + + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver( + resource_for_federation=Mock(), + http_client=None, + ) + self.store = self.hs.get_datastore() + self.db_pool = self.hs.get_db_pool() + self.message_handler = self.hs.get_handlers().message_handler + self.event_injector = EventInjector(self.hs) + + @defer.inlineCallbacks + def test_count_daily_messages(self): + self.db_pool.runQuery("DELETE FROM stats_reporting") + + self.hs.clock.now = 100 + + # Never reported before, and nothing which could be reported + count = yield self.store.count_daily_messages() + self.assertIsNone(count) + count = yield self.db_pool.runQuery("SELECT COUNT(*) FROM stats_reporting") + self.assertEqual([(0,)], count) + + # Create something to report + room = RoomID.from_string("!abc123:test") + user = UserID.from_string("@raccoonlover:test") + yield self.event_injector.create_room(room) + + self.base_event = yield self._get_last_stream_token() + + yield self.event_injector.inject_message(room, user, "Raccoons are really cute") + + # Never reported before, something could be reported, but isn't because + # it isn't old enough. + count = yield self.store.count_daily_messages() + self.assertIsNone(count) + self._assert_stats_reporting(1, self.hs.clock.now) + + # Already reported yesterday, two new events from today. + yield self.event_injector.inject_message(room, user, "Yeah they are!") + yield self.event_injector.inject_message(room, user, "Incredibly!") + self.hs.clock.now += 60 * 60 * 24 + count = yield self.store.count_daily_messages() + self.assertEqual(2, count) # 2 since yesterday + self._assert_stats_reporting(3, self.hs.clock.now) # 3 ever + + # Last reported too recently. + yield self.event_injector.inject_message(room, user, "Who could disagree?") + self.hs.clock.now += 60 * 60 * 22 + count = yield self.store.count_daily_messages() + self.assertIsNone(count) + self._assert_stats_reporting(4, self.hs.clock.now) + + # Last reported too long ago + yield self.event_injector.inject_message(room, user, "No one.") + self.hs.clock.now += 60 * 60 * 26 + count = yield self.store.count_daily_messages() + self.assertIsNone(count) + self._assert_stats_reporting(5, self.hs.clock.now) + + # And now let's actually report something + yield self.event_injector.inject_message(room, user, "Indeed.") + yield self.event_injector.inject_message(room, user, "Indeed.") + yield self.event_injector.inject_message(room, user, "Indeed.") + # A little over 24 hours is fine :) + self.hs.clock.now += (60 * 60 * 24) + 50 + count = yield self.store.count_daily_messages() + self.assertEqual(3, count) + self._assert_stats_reporting(8, self.hs.clock.now) + + @defer.inlineCallbacks + def _get_last_stream_token(self): + rows = yield self.db_pool.runQuery( + "SELECT stream_ordering" + " FROM events" + " ORDER BY stream_ordering DESC" + " LIMIT 1" + ) + if not rows: + defer.returnValue(0) + else: + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _assert_stats_reporting(self, messages, time): + rows = yield self.db_pool.runQuery( + "SELECT reported_stream_token, reported_time FROM stats_reporting" + ) + self.assertEqual([(self.base_event + messages, time,)], rows) diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index ab7625a3ca..caffce64e3 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -85,7 +85,7 @@ class RoomEventsStoreTestCase(unittest.TestCase): # Room events need the full datastore, for persist_event() and # get_room_state() self.store = hs.get_datastore() - self.event_factory = hs.get_event_factory(); + self.event_factory = hs.get_event_factory() self.room = RoomID.from_string("!abcde:test") diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 0c9b89d765..a658a789aa 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -19,6 +19,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.types import UserID, RoomID +from tests.storage.event_injector import EventInjector from tests.utils import setup_test_homeserver @@ -36,6 +37,7 @@ class StreamStoreTestCase(unittest.TestCase): self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() + self.event_injector = EventInjector(hs) self.handlers = hs.get_handlers() self.message_handler = self.handlers.message_handler @@ -45,60 +47,20 @@ class StreamStoreTestCase(unittest.TestCase): self.room1 = RoomID.from_string("!abc123:test") self.room2 = RoomID.from_string("!xyx987:test") - self.depth = 1 - - @defer.inlineCallbacks - def inject_room_member(self, room, user, membership): - self.depth += 1 - - builder = self.event_builder_factory.new({ - "type": EventTypes.Member, - "sender": user.to_string(), - "state_key": user.to_string(), - "room_id": room.to_string(), - "content": {"membership": membership}, - }) - - event, context = yield self.message_handler._create_new_client_event( - builder - ) - - yield self.store.persist_event(event, context) - - defer.returnValue(event) - - @defer.inlineCallbacks - def inject_message(self, room, user, body): - self.depth += 1 - - builder = self.event_builder_factory.new({ - "type": EventTypes.Message, - "sender": user.to_string(), - "state_key": user.to_string(), - "room_id": room.to_string(), - "content": {"body": body, "msgtype": u"message"}, - }) - - event, context = yield self.message_handler._create_new_client_event( - builder - ) - - yield self.store.persist_event(event, context) - @defer.inlineCallbacks def test_event_stream_get_other(self): # Both bob and alice joins the room - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_alice, Membership.JOIN ) - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_bob, Membership.JOIN ) # Initial stream key: start = yield self.store.get_room_events_max_id() - yield self.inject_message(self.room1, self.u_alice, u"test") + yield self.event_injector.inject_message(self.room1, self.u_alice, u"test") end = yield self.store.get_room_events_max_id() @@ -125,17 +87,17 @@ class StreamStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_event_stream_get_own(self): # Both bob and alice joins the room - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_alice, Membership.JOIN ) - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_bob, Membership.JOIN ) # Initial stream key: start = yield self.store.get_room_events_max_id() - yield self.inject_message(self.room1, self.u_alice, u"test") + yield self.event_injector.inject_message(self.room1, self.u_alice, u"test") end = yield self.store.get_room_events_max_id() @@ -162,22 +124,22 @@ class StreamStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_event_stream_join_leave(self): # Both bob and alice joins the room - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_alice, Membership.JOIN ) - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_bob, Membership.JOIN ) # Then bob leaves again. - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_bob, Membership.LEAVE ) # Initial stream key: start = yield self.store.get_room_events_max_id() - yield self.inject_message(self.room1, self.u_alice, u"test") + yield self.event_injector.inject_message(self.room1, self.u_alice, u"test") end = yield self.store.get_room_events_max_id() @@ -193,17 +155,17 @@ class StreamStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_event_stream_prev_content(self): - yield self.inject_room_member( + yield self.event_injector.inject_room_member( self.room1, self.u_bob, Membership.JOIN ) - event1 = yield self.inject_room_member( + event1 = yield self.event_injector.inject_room_member( self.room1, self.u_alice, Membership.JOIN ) start = yield self.store.get_room_events_max_id() - event2 = yield self.inject_room_member( + event2 = yield self.event_injector.inject_room_member( self.room1, self.u_alice, Membership.JOIN, ) -- cgit 1.4.1 From c85c9125627a62c73711786723be12be30d7a81e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Oct 2015 15:48:31 +0100 Subject: Add basic full text search impl. --- synapse/api/constants.py | 19 +++++++ synapse/handlers/__init__.py | 2 + synapse/handlers/search.py | 95 ++++++++++++++++++++++++++++++++++ synapse/rest/client/v1/room.py | 17 ++++++ synapse/storage/__init__.py | 2 + synapse/storage/_base.py | 2 +- synapse/storage/schema/delta/24/fts.py | 57 ++++++++++++++++++++ synapse/storage/search.py | 75 +++++++++++++++++++++++++++ 8 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 synapse/handlers/search.py create mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/search.py (limited to 'synapse/storage/schema') diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 008ee64727..7c7f9ff957 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -84,3 +84,22 @@ class RoomCreationPreset(object): PRIVATE_CHAT = "private_chat" PUBLIC_CHAT = "public_chat" TRUSTED_PRIVATE_CHAT = "trusted_private_chat" + + +class SearchConstraintTypes(object): + FTS = "fts" + EXACT = "exact" + PREFIX = "prefix" + SUBSTRING = "substring" + RANGE = "range" + + +class KnownRoomEventKeys(object): + CONTENT_BODY = "content.body" + CONTENT_MSGTYPE = "content.msgtype" + CONTENT_NAME = "content.name" + CONTENT_TOPIC = "content.topic" + + SENDER = "sender" + ORIGIN_SERVER_TS = "origin_server_ts" + ROOM_ID = "room_id" diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 8725c3c420..87b4d381c7 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -32,6 +32,7 @@ from .sync import SyncHandler from .auth import AuthHandler from .identity import IdentityHandler from .receipts import ReceiptsHandler +from .search import SearchHandler class Handlers(object): @@ -68,3 +69,4 @@ class Handlers(object): self.sync_handler = SyncHandler(hs) self.auth_handler = AuthHandler(hs) self.identity_handler = IdentityHandler(hs) + self.search_handler = SearchHandler(hs) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py new file mode 100644 index 0000000000..8b997fc394 --- /dev/null +++ b/synapse/handlers/search.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from ._base import BaseHandler + +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.errors import SynapseError +from synapse.events.utils import serialize_event + +import logging + + +logger = logging.getLogger(__name__) + + +KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { + KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.CONTENT_NAME: [SearchConstraintTypes.FTS, SearchConstraintTypes.EXACT, SearchConstraintTypes.SUBSTRING], + KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], + KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], +} + + +class RoomConstraint(object): + def __init__(self, search_type, keys, value): + self.search_type = search_type + self.keys = keys + self.value = value + + @classmethod + def from_dict(cls, d): + search_type = d["type"] + keys = d["keys"] + + for key in keys: + if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: + raise SynapseError(400, "Unrecognized key %r", key) + + if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: + raise SynapseError(400, "Disallowed constraint type %r for key %r", search_type, key) + + return cls(search_type, keys, d["value"]) + + +class SearchHandler(BaseHandler): + + def __init__(self, hs): + super(SearchHandler, self).__init__(hs) + + @defer.inlineCallbacks + def search(self, content): + constraint_dicts = content["search_categories"]["room_events"]["constraints"] + constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] + + fts = False + for c in constraints: + if c.search_type == SearchConstraintTypes.FTS: + if fts: + raise SynapseError(400, "Only one constraint can be FTS") + fts = True + + res = yield self.hs.get_datastore().search_msgs(constraints) + + time_now = self.hs.get_clock().time_msec() + + results = [ + { + "rank": r["rank"], + "result": serialize_event(r["result"], time_now) + } + for r in res + ] + + logger.info("returning: %r", results) + + results.sort(key=lambda r: -r["rank"]) + + defer.returnValue(results) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 23871f161e..35bd702a43 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -529,6 +529,22 @@ class RoomTypingRestServlet(ClientV1RestServlet): defer.returnValue((200, {})) +class SearchRestServlet(ClientV1RestServlet): + PATTERN = client_path_pattern( + "/search$" + ) + + @defer.inlineCallbacks + def on_POST(self, request): + auth_user, _ = yield self.auth.get_user_by_req(request) + + content = _parse_json(request) + + results = yield self.handlers.search_handler.search(content) + + defer.returnValue((200, results)) + + def _parse_json(request): try: content = json.loads(request.content.read()) @@ -585,3 +601,4 @@ def register_servlets(hs, http_server): RoomInitialSyncRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) RoomTypingRestServlet(hs).register(http_server) + SearchRestServlet(hs).register(http_server) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 340e59afcb..5f91ef77c0 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -40,6 +40,7 @@ from .filtering import FilteringStore from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore +from .search import SearchStore import fnmatch @@ -79,6 +80,7 @@ class DataStore(RoomMemberStore, RoomStore, EventsStore, ReceiptsStore, EndToEndKeyStore, + SearchStore, ): def __init__(self, hs): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 693784ad38..218e708054 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -519,7 +519,7 @@ class SQLBaseStore(object): allow_none=False, desc="_simple_select_one_onecol"): """Executes a SELECT query on the named table, which is expected to - return a single row, returning a single column from it." + return a single row, returning a single column from it. Args: table : string giving the table name diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py new file mode 100644 index 0000000000..5680332758 --- /dev/null +++ b/synapse/storage/schema/delta/24/fts.py @@ -0,0 +1,57 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage import get_statements +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_idx ON event_search USING gin(vector); +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + # We only support FTS for postgres currently. + return + + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) diff --git a/synapse/storage/search.py b/synapse/storage/search.py new file mode 100644 index 0000000000..eea4477765 --- /dev/null +++ b/synapse/storage/search.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from _base import SQLBaseStore +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes + + +class SearchStore(SQLBaseStore): + @defer.inlineCallbacks + def search_msgs(self, constraints): + clauses = [] + args = [] + fts = None + + for c in constraints: + local_clauses = [] + if c.search_type == SearchConstraintTypes.FTS: + fts = c.value + for key in c.keys: + local_clauses.append("key = ?") + args.append(key) + elif c.search_type == SearchConstraintTypes.EXACT: + for key in c.keys: + if key == KnownRoomEventKeys.ROOM_ID: + for value in c.value: + local_clauses.append("room_id = ?") + args.append(value) + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + sql = ( + "SELECT ts_rank_cd(vector, query) AS rank, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " WHERE vector @@ query" + ) + + for clause in clauses: + sql += " AND " + clause + + sql += " ORDER BY rank DESC" + + results = yield self._execute( + "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + defer.returnValue([ + { + "rank": r["rank"], + "result": event_map[r["event_id"]] + } + for r in results + if r["event_id"] in event_map + ]) -- cgit 1.4.1 From 61561b9df791ec90e287e535cc75831c2016bf36 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:49:53 +0100 Subject: Keep FTS indexes up to date. Only search through rooms currently joined --- synapse/handlers/search.py | 31 ++++++++++++++++++++++--------- synapse/rest/client/v1/room.py | 2 +- synapse/storage/events.py | 2 ++ synapse/storage/room.py | 22 ++++++++++++++++++++++ synapse/storage/schema/delta/24/fts.py | 3 ++- synapse/storage/search.py | 7 ++++++- 6 files changed, 55 insertions(+), 12 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8b997fc394..b6bdb752e9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -65,7 +65,7 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, content): + def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] @@ -76,20 +76,33 @@ class SearchHandler(BaseHandler): raise SynapseError(400, "Only one constraint can be FTS") fts = True - res = yield self.hs.get_datastore().search_msgs(constraints) + rooms = yield self.store.get_rooms_for_user( + user.to_string(), + ) - time_now = self.hs.get_clock().time_msec() + # For some reason the list of events contains duplicates + # TODO(paul): work out why because I really don't think it should + room_ids = set(r.room_id for r in rooms) - results = [ - { + res = yield self.store.search_msgs(room_ids, constraints) + + time_now = self.clock.time_msec() + + results = { + r["result"].event_id: { "rank": r["rank"], "result": serialize_event(r["result"], time_now) } for r in res - ] + } logger.info("returning: %r", results) - results.sort(key=lambda r: -r["rank"]) - - defer.returnValue(results) + defer.returnValue({ + "search_categories": { + "room_events": { + "results": results, + "count": len(results) + } + } + }) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 35bd702a43..94adabca62 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -540,7 +540,7 @@ class SearchRestServlet(ClientV1RestServlet): content = _parse_json(request) - results = yield self.handlers.search_handler.search(content) + results = yield self.handlers.search_handler.search(auth_user, content) defer.returnValue((200, results)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 416ef6af93..e6c1abfc27 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -307,6 +307,8 @@ class EventsStore(SQLBaseStore): self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: self._store_room_topic_txn(txn, event) + elif event.type == EventTypes.Message: + self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5e07b7e0e5..e4e830944a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -175,6 +175,10 @@ class RoomStore(SQLBaseStore): }, ) + self._store_event_search_txn( + txn, event, "content.topic", event.content["topic"] + ) + def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: self._simple_insert_txn( @@ -187,6 +191,24 @@ class RoomStore(SQLBaseStore): } ) + self._store_event_search_txn( + txn, event, "content.name", event.content["name"] + ) + + def _store_room_message_txn(self, txn, event): + if hasattr(event, "content") and "body" in event.content: + self._store_event_search_txn( + txn, event, "content.body", event.content["body"] + ) + + def _store_event_search_txn(self, txn, event, key, value): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 5680332758..05f1605fdd 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -44,7 +44,8 @@ INSERT INTO event_search SELECT FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; -CREATE INDEX event_search_idx ON event_search USING gin(vector); +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index eea4477765..e66b5f9edc 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -21,11 +21,16 @@ from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, constraints): + def search_msgs(self, room_ids, constraints): clauses = [] args = [] fts = None + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) + for c in constraints: local_clauses = [] if c.search_type == SearchConstraintTypes.FTS: -- cgit 1.4.1 From 1a40afa75693f0c2ae3b2eaac62ff9ca6bb02488 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 10:36:25 +0100 Subject: Add sqlite schema --- synapse/storage/schema/delta/24/fts.py | 69 +++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 05f1605fdd..a806f4b8d3 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -15,7 +15,9 @@ import logging from synapse.storage import get_statements -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson logger = logging.getLogger(__name__) @@ -46,13 +48,70 @@ INSERT INTO event_search SELECT CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); """ +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" +) +SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" + + def run_upgrade(cur, database_engine, *args, **kwargs): - if not isinstance(database_engine, PostgresEngine): - # We only support FTS for postgres currently. + if isinstance(database_engine, PostgresEngine): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) return - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + if isinstance(database_engine, Sqlite3Engine): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) + + # cur.execute(SQLITE_INDEX) -- cgit 1.4.1 From 22a8c91448f710c20a6aee66ec2a452528f1d637 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:19:44 +0100 Subject: Split up run_upgrade --- synapse/storage/schema/delta/24/fts.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index b45a5fd820..0c752d8426 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -55,16 +55,24 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" ) -SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + run_postgres_upgrade(cur) return if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): cur.execute(SQLITE_TABLE) rowid = -1 @@ -113,5 +121,3 @@ def run_upgrade(cur, database_engine, *args, **kwargs): " VALUES (?,?,?,?)", rows ) - - # cur.execute(SQLITE_INDEX) -- cgit 1.4.1 From 4d25bc6c92c3d219786892c5d510e0c4c4eb8b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:12:28 +0100 Subject: Move FTS to delta 25 --- synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/24/fts.py | 123 --------------------------------- synapse/storage/schema/delta/25/fts.py | 123 +++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 124 deletions(-) delete mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/schema/delta/25/fts.py (limited to 'synapse/storage/schema') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1ddf55be4d..1a74d6e360 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 24 +SCHEMA_VERSION = 25 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py deleted file mode 100644 index 0c752d8426..0000000000 --- a/synapse/storage/schema/delta/24/fts.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -from synapse.storage.prepare_database import get_statements -from synapse.storage.engines import PostgresEngine, Sqlite3Engine - -import ujson - -logger = logging.getLogger(__name__) - - -POSTGRES_SQL = """ -CREATE TABLE event_search ( - event_id TEXT, - room_id TEXT, - key TEXT, - vector tsvector -); - -INSERT INTO event_search SELECT - event_id, room_id, 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - -CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); -CREATE INDEX event_search_ev_idx ON event_search(event_id); -CREATE INDEX event_search_ev_ridx ON event_search(room_id); -""" - - -SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" -) - - -def run_upgrade(cur, database_engine, *args, **kwargs): - if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) - return - - if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return - - -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) - - -def run_sqlite_upgrade(cur): - cur.execute(SQLITE_TABLE) - - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - if ev["type"] == "m.room.message": - rows.append(( - ev["event_id"], ev["room_id"], "content.body", - ev["content"]["body"] - )) - if ev["type"] == "m.room.name": - rows.append(( - ev["event_id"], ev["room_id"], "content.name", - ev["content"]["name"] - )) - if ev["type"] == "m.room.topic": - rows.append(( - ev["event_id"], ev["room_id"], "content.topic", - ev["content"]["topic"] - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)", - rows - ) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py new file mode 100644 index 0000000000..fd181fc630 --- /dev/null +++ b/synapse/storage/schema/delta/25/fts.py @@ -0,0 +1,123 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage.prepare_database import get_statements +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE IF NOT EXISTS event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); +""" + + +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" +) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + run_postgres_upgrade(cur) + return + + if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) -- cgit 1.4.1 From f142898f529f89c5bd90710db2d0771894b0b33f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:18:01 +0100 Subject: PEP8 --- synapse/storage/schema/delta/25/fts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index fd181fc630..ed3cc06557 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -53,7 +53,8 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" + " USING fts3 ( event_id, room_id, key, value)" ) -- cgit 1.4.1 From 0c36098c1fe561629651e446589a644fed9188e4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Oct 2015 13:23:48 +0100 Subject: Implement rank function for SQLite FTS --- synapse/storage/engines/sqlite3.py | 27 +++++++++++++++++++++++++++ synapse/storage/schema/delta/25/fts.py | 2 +- synapse/storage/search.py | 3 ++- 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index bad3b5c5ac..a5a54ec011 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -17,6 +17,8 @@ from synapse.storage.prepare_database import ( prepare_database, prepare_sqlite3_database ) +import struct + class Sqlite3Engine(object): single_threaded = True @@ -32,6 +34,7 @@ class Sqlite3Engine(object): def on_new_connection(self, db_conn): self.prepare_database(db_conn) + db_conn.create_function("rank", 1, _rank) def prepare_database(self, db_conn): prepare_sqlite3_database(db_conn) @@ -45,3 +48,27 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + + +# Following functions taken from: https://github.com/coleifer/peewee + +def _parse_match_info(buf): + bufsize = len(buf) + return [struct.unpack('@I', buf[i:i+4])[0] for i in range(0, bufsize, 4)] + + +def _rank(raw_match_info): + """Handle match_info called w/default args 'pcx' - based on the example rank + function http://sqlite.org/fts3.html#appendix_a + """ + match_info = _parse_match_info(raw_match_info) + score = 0.0 + p, c = match_info[:2] + for phrase_num in range(p): + phrase_info_idx = 2 + (phrase_num * c * 3) + for col_num in range(c): + col_idx = phrase_info_idx + (col_num * 3) + x1, x2 = match_info[col_idx:col_idx + 2] + if x1 > 0: + score += float(x1) / x2 + return score diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index ed3cc06557..3f0b02d119 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -54,7 +54,7 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" - " USING fts3 ( event_id, room_id, key, value)" + " USING fts4 ( event_id, room_id, key, value )" ) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 9608b5d6a7..cdf003502f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -72,7 +72,8 @@ class SearchStore(SQLBaseStore): ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT 0 as rank, room_id, event_id FROM event_search" + "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" + " FROM event_search" " WHERE value MATCH ?" ) else: -- cgit 1.4.1 From 4cf633d5e9628a56cf9b400ee3e073fcdcb365f0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Oct 2015 15:41:36 +0100 Subject: Pull out sender when computing search results --- synapse/storage/schema/delta/25/fts.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index 3f0b02d119..056487af30 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -26,22 +26,23 @@ POSTGRES_SQL = """ CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, + sender TEXT, key TEXT, vector tsvector ); INSERT INTO event_search SELECT - event_id, room_id, 'content.body', + event_id, room_id, json::json->>'sender', 'content.body', to_tsvector('english', json::json->'content'->>'body') FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; INSERT INTO event_search SELECT - event_id, room_id, 'content.name', + event_id, room_id, json::json->>'sender', 'content.name', to_tsvector('english', json::json->'content'->>'name') FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; INSERT INTO event_search SELECT - event_id, room_id, 'content.topic', + event_id, room_id, json::json->>'sender', 'content.topic', to_tsvector('english', json::json->'content'->>'topic') FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; @@ -99,26 +100,28 @@ def run_sqlite_upgrade(cur): rows = [] for ev in events: - if ev["type"] == "m.room.message": + content = ev.get("content", {}) + body = content.get("body", None) + name = content.get("name", None) + topic = content.get("topic", None) + sender = ev.get("sender", None) + if ev["type"] == "m.room.message" and body: rows.append(( - ev["event_id"], ev["room_id"], "content.body", - ev["content"]["body"] + ev["event_id"], ev["room_id"], sender, "content.body", body )) - if ev["type"] == "m.room.name": + if ev["type"] == "m.room.name" and name: rows.append(( - ev["event_id"], ev["room_id"], "content.name", - ev["content"]["name"] + ev["event_id"], ev["room_id"], sender, "content.name", name )) - if ev["type"] == "m.room.topic": + if ev["type"] == "m.room.topic" and topic: rows.append(( - ev["event_id"], ev["room_id"], "content.topic", - ev["content"]["topic"] + ev["event_id"], ev["room_id"], sender, "content.topic", topic )) if rows: logger.info(rows) cur.executemany( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)", + "INSERT INTO event_search (event_id, room_id, sender, key, value)" + " VALUES (?,?,?,?,?)", rows ) -- cgit 1.4.1 From 892e70ec8404865b4b1e6cf4eef7a3ada7114149 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 28 Oct 2015 16:06:57 +0000 Subject: Add APIs for adding and removing tags from rooms --- synapse/rest/client/v2_alpha/__init__.py | 2 + synapse/rest/client/v2_alpha/tags.py | 89 +++++++++++++++ synapse/storage/__init__.py | 2 + synapse/storage/schema/delta/25/tags.sql | 37 ++++++ synapse/storage/tags.py | 190 +++++++++++++++++++++++++++++++ 5 files changed, 320 insertions(+) create mode 100644 synapse/rest/client/v2_alpha/tags.py create mode 100644 synapse/storage/schema/delta/25/tags.sql create mode 100644 synapse/storage/tags.py (limited to 'synapse/storage/schema') diff --git a/synapse/rest/client/v2_alpha/__init__.py b/synapse/rest/client/v2_alpha/__init__.py index 5831ff0e62..a108132346 100644 --- a/synapse/rest/client/v2_alpha/__init__.py +++ b/synapse/rest/client/v2_alpha/__init__.py @@ -22,6 +22,7 @@ from . import ( receipts, keys, tokenrefresh, + tags, ) from synapse.http.server import JsonResource @@ -44,3 +45,4 @@ class ClientV2AlphaRestResource(JsonResource): receipts.register_servlets(hs, client_resource) keys.register_servlets(hs, client_resource) tokenrefresh.register_servlets(hs, client_resource) + tags.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py new file mode 100644 index 0000000000..c4a670c5a7 --- /dev/null +++ b/synapse/rest/client/v2_alpha/tags.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import client_v2_pattern + +from synapse.http.servlet import RestServlet + +from twisted.internet import defer + +import logging + +logger = logging.getLogger(__name__) + + +class TagListServlet(RestServlet): + """ + GET /user/{user_id}/rooms/{room_id}/tags HTTP/1.1 + """ + PATTERN = client_v2_pattern( + "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags" + ) + + def __init__(self, hs): + super(TagListServlet, self).__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, user_id, room_id): + auth_user, _ = yield self.auth.get_user_by_req(request) + if user_id != auth_user.to_string(): + raise AuthError(403, "Cannot get tags for other users.") + + tags = yield self.store.get_tags_for_room(user_id, room_id) + + defer.returnValue((200, {"tags": tags})) + + +class TagServlet(RestServlet): + """ + PUT /user/{user_id}/rooms/{room_id}/tags/{tag} HTTP/1.1 + DELETE /user/{user_id}/rooms/{room_id}/tags/{tag} HTTP/1.1 + """ + PATTERN = client_v2_pattern( + "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags/(?P[^/]*)" + ) + def __init__(self, hs): + super(TagServlet, self).__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_PUT(self, request, user_id, room_id, tag): + auth_user, _ = yield self.auth.get_user_by_req(request) + if user_id != auth_user.to_string(): + raise AuthError(403, "Cannot add tags for other users.") + + yield self.store.add_tag_to_room(user_id, room_id, tag) + + # TODO: poke the notifier. + defer.returnValue((200, {})) + + @defer.inlineCallbacks + def on_DELETE(self, request, user_id, room_id, tag): + auth_user, _ = yield self.auth.get_user_by_req(request) + if user_id != auth_user.to_string(): + raise AuthError(403, "Cannot add tags for other users.") + + yield self.store.remove_tag_from_room(user_id, room_id, tag) + + # TODO: poke the notifier. + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + TagListServlet(hs).register(http_server) + TagServlet(hs).register(http_server) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index a1bd9c4ce9..e7443f2838 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -41,6 +41,7 @@ from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore from .search import SearchStore +from .tags import TagsStore import logging @@ -71,6 +72,7 @@ class DataStore(RoomMemberStore, RoomStore, ReceiptsStore, EndToEndKeyStore, SearchStore, + TagsStore, ): def __init__(self, hs): diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/schema/delta/25/tags.sql new file mode 100644 index 0000000000..168766dcf3 --- /dev/null +++ b/synapse/storage/schema/delta/25/tags.sql @@ -0,0 +1,37 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS room_tags( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + tag TEXT NOT NULL, -- The name of the tag. + CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) +); + +CREATE TABLE IF NOT EXISTS room_tags_revisions ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + stream_id BIGINT NOT NULL, -- The current version of the room tags. + CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) +); + +CREATE TABLE IF NOT EXISTS private_user_data_max_stream_id( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); + +INSERT INTO private_user_data_max_stream_id (stream_id) VALUES (0); diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py new file mode 100644 index 0000000000..507e60596f --- /dev/null +++ b/synapse/storage/tags.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached +from twisted.internet import defer +from .util.id_generators import StreamIdGenerator + +import logging + +logger = logging.getLogger(__name__) + + +class TagsStore(SQLBaseStore): + def __init__(self, hs): + super(TagsStore, self).__init__(hs) + + self._private_user_data_id_gen = StreamIdGenerator( + "private_user_data_max_stream_id", "stream_id" + ) + + @cached() + def get_tags_for_user(self, user_id): + """Get all the tags for a user. + + + Args: + user_id(str): The user to get the tags for. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings. + """ + + deferred = self._simple_select_list( + "room_tags", {"user_id": user_id}, ["room_id", "tag"] + ) + + @deferred.addCallback + def tags_by_room(rows): + tags_by_room = {} + for row in rows: + tags_by_room.setdefault(row["room_id"], []).append(row["tag"]) + return tags_by_room + + return deferred + + @defer.inlineCallbacks + def get_updated_tags(self, user_id, stream_id): + """Get all the tags for the rooms where the tags have changed since the + given version + + Args: + user_id(str): The user to get the tags for. + stream_id(int): The earliest update to get for the user. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings for all the rooms that changed since the stream_id token. + """ + def get_updated_tags_txn(txn): + sql = ( + "SELECT room_id from room_tags_revisions" + " WHERE user_id = ? AND stream_id > ?" + ) + txn.execute(sql, (user_id, stream_id)) + room_ids = [row[0] for row in txn.fetchall()] + return room_ids + + room_ids = yield self.runInteraction( + "get_updated_tags", get_updated_tags_txn + ) + + results = {} + if room_ids: + tags_by_room = yield self.get_tags_for_user(self, user_id) + for room_id in rooms_ids: + results[room_id] = tags_by_room[room_id] + + defer.returnValue(results) + + def get_tags_for_room(self, user_id, room_id): + """Get all the tags for the given room + Args: + user_id(str): The user to get tags for + room_id(str): The room to get tags for + Returns: + A deferred list of string tags. + """ + return self._simple_select_onecol( + table="room_tags", + keyvalues={"user_id": user_id, "room_id": room_id}, + retcol="tag", + desc="get_tags_for_room", + ) + + @defer.inlineCallbacks + def add_tag_to_room(self, user_id, room_id, tag): + """Add a tag to a room for a user. + Returns: + A deferred that completes once the tag has been added. + """ + def add_tag_txn(txn, next_id): + sql = ( + "INSERT INTO room_tags (user_id, room_id, tag)" + " VALUES (?, ?, ?)" + ) + try: + txn.execute(sql, (user_id, room_id, tag)) + except database_engine.module.IntegrityError as e: + # Return early if the row is already in the table + # and we don't need to bump the revision number of the + # private_user_data. + return + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("add_tag", add_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + @defer.inlineCallbacks + def remove_tag_from_room(self, user_id, room_id, tag): + """Remove a tag from a room for a user. + Returns: + A deffered that completes once the tag has been removed + """ + def remove_tag_txn(txn, next_id): + sql = ( + "DELETE FROM room_tags " + " WHERE user_id = ? AND room_id = ? AND tag = ?" + ) + txn.execute(sql, (user_id, room_id, tag)) + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("remove_tag", remove_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + def _update_revision_txn(self, txn, user_id, room_id, next_id): + """Update the latest revision of the tags for the given user and room. + + Args: + txn: The database cursor + user_id(str): The ID of the user. + room_id(str): The ID of the room. + next_id(int): The the revision to advance to. + """ + + update_max_id_sql = ( + "UPDATE private_user_data_max_stream_id" + " SET stream_id = ?" + " WHERE stream_id < ?" + ) + txn.execute(update_max_id_sql, (next_id, next_id)) + + update_sql = ( + "UPDATE room_tags_revisions" + " SET stream_id = ?" + " WHERE user_id = ?" + " AND room_id = ?" + ) + txn.execute(update_sql, (next_id, user_id, room_id)) + + if txn.rowcount == 0: + insert_sql = ( + "INSERT INTO room_tags_revisions (user_id, room_id, stream_id)" + " VALUES (?, ?, ?)" + ) + try: + txn.execute(insert_sql, (user_id, room_id, next_id)) + except database_engine.module.IntegrityError as e: + # Ignore insertion errors. It doesn't matter if the row wasn't + # inserted because if two updates happend concurrently the one + # with the higher stream_id will not be reported to a client + # unless the previous update has completed. It doesn't matter + # which stream_id ends up in the table, as long as it is higher + # than the id that the client has. + pass -- cgit 1.4.1 From 621e84d9a0f06f65bd51171079fd18eb916ab81a Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Fri, 30 Oct 2015 16:25:53 +0000 Subject: Add missing column --- synapse/storage/schema/delta/25/fts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index 056487af30..b7cd0ce3b8 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -55,7 +55,7 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" - " USING fts4 ( event_id, room_id, key, value )" + " USING fts4 ( event_id, room_id, sender, key, value )" ) -- cgit 1.4.1 From ddd8566f415ab3a6092aa4947e5d2aebf67109fc Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 2 Nov 2015 15:11:31 +0000 Subject: Store room tag content and return the content in the m.tag event --- synapse/handlers/message.py | 6 ++--- synapse/rest/client/v2_alpha/tags.py | 12 +++++++-- synapse/storage/schema/delta/25/tags.sql | 1 + synapse/storage/tags.py | 44 ++++++++++++++++++++------------ 4 files changed, 41 insertions(+), 22 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8f156e5c84..0f947993d1 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -405,7 +405,6 @@ class MessageHandler(BaseHandler): tags = tags_by_room.get(event.room_id) if tags: private_user_data.append({ - "room_id": event.room_id, "type": "m.tag", "content": {"tags": tags}, }) @@ -466,7 +465,6 @@ class MessageHandler(BaseHandler): private_user_data.append({ "type": "m.tag", "content": {"tags": tags}, - "room_id": room_id, }) result["private_user_data"] = private_user_data @@ -499,8 +497,8 @@ class MessageHandler(BaseHandler): user_id, messages ) - start_token = StreamToken(token[0], 0, 0, 0) - end_token = StreamToken(token[1], 0, 0, 0) + start_token = StreamToken(token[0], 0, 0, 0, 0) + end_token = StreamToken(token[1], 0, 0, 0, 0) time_now = self.clock.time_msec() diff --git a/synapse/rest/client/v2_alpha/tags.py b/synapse/rest/client/v2_alpha/tags.py index 486add9909..4e3f917fc5 100644 --- a/synapse/rest/client/v2_alpha/tags.py +++ b/synapse/rest/client/v2_alpha/tags.py @@ -16,12 +16,14 @@ from ._base import client_v2_pattern from synapse.http.servlet import RestServlet -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, SynapseError from twisted.internet import defer import logging +import simplejson as json + logger = logging.getLogger(__name__) @@ -70,7 +72,13 @@ class TagServlet(RestServlet): if user_id != auth_user.to_string(): raise AuthError(403, "Cannot add tags for other users.") - max_id = yield self.store.add_tag_to_room(user_id, room_id, tag) + try: + content_bytes = request.content.read() + body = json.loads(content_bytes) + except: + raise SynapseError(400, "Invalid tag JSON") + + max_id = yield self.store.add_tag_to_room(user_id, room_id, tag, body) yield self.notifier.on_new_event( "private_user_data_key", max_id, users=[user_id] diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/schema/delta/25/tags.sql index 168766dcf3..527424c998 100644 --- a/synapse/storage/schema/delta/25/tags.sql +++ b/synapse/storage/schema/delta/25/tags.sql @@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, -- The name of the tag. + content TEXT NOT NULL, -- The JSON content of the tag. CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) ); diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 2d5c49144a..34aa38c06a 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -18,6 +18,7 @@ from synapse.util.caches.descriptors import cached from twisted.internet import defer from .util.id_generators import StreamIdGenerator +import ujson as json import logging logger = logging.getLogger(__name__) @@ -52,14 +53,15 @@ class TagsStore(SQLBaseStore): """ deferred = self._simple_select_list( - "room_tags", {"user_id": user_id}, ["room_id", "tag"] + "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"] ) @deferred.addCallback def tags_by_room(rows): tags_by_room = {} for row in rows: - tags_by_room.setdefault(row["room_id"], []).append(row["tag"]) + room_tags = tags_by_room.setdefault(row["room_id"], {}) + room_tags[row["tag"]] = json.loads(row["content"]) return tags_by_room return deferred @@ -105,31 +107,41 @@ class TagsStore(SQLBaseStore): Returns: A deferred list of string tags. """ - return self._simple_select_onecol( + return self._simple_select_list( table="room_tags", keyvalues={"user_id": user_id, "room_id": room_id}, - retcol="tag", + retcols=("tag", "content"), desc="get_tags_for_room", - ) + ).addCallback(lambda rows: { + row["tag"]: json.loads(row["content"]) for row in rows + }) @defer.inlineCallbacks - def add_tag_to_room(self, user_id, room_id, tag): + def add_tag_to_room(self, user_id, room_id, tag, content): """Add a tag to a room for a user. + Args: + user_id(str): The user to add a tag for. + room_id(str): The room to add a tag for. + tag(str): The tag name to add. + content(dict): A json object to associate with the tag. Returns: A deferred that completes once the tag has been added. """ + content_json = json.dumps(content) + def add_tag_txn(txn, next_id): - sql = ( - "INSERT INTO room_tags (user_id, room_id, tag)" - " VALUES (?, ?, ?)" + self._simple_upsert_txn( + txn, + table="room_tags", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "tag": tag, + }, + values={ + "content": content_json, + } ) - try: - txn.execute(sql, (user_id, room_id, tag)) - except self.database_engine.module.IntegrityError: - # Return early if the row is already in the table - # and we don't need to bump the revision number of the - # private_user_data. - return self._update_revision_txn(txn, user_id, room_id, next_id) with (yield self._private_user_data_id_gen.get_next(self)) as next_id: -- cgit 1.4.1 From ca2f90742d5606f8fc5b7ddd3dd7244c377c1df8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Thu, 5 Nov 2015 14:32:26 +0000 Subject: Open up /events to anonymous users for room events only Squash-merge of PR #345 from daniel/anonymousevents --- synapse/handlers/_base.py | 7 ++- synapse/handlers/events.py | 10 ++- synapse/handlers/message.py | 47 ++++++++++---- synapse/handlers/presence.py | 4 +- synapse/handlers/private_user_data.py | 2 +- synapse/handlers/receipts.py | 6 +- synapse/handlers/room.py | 11 +++- synapse/handlers/sync.py | 20 +++++- synapse/handlers/typing.py | 11 +--- synapse/notifier.py | 42 ++++++++++--- synapse/rest/client/v1/events.py | 13 +++- synapse/rest/client/v1/room.py | 6 +- synapse/storage/events.py | 2 + synapse/storage/room.py | 13 ++++ .../storage/schema/delta/25/history_visibility.sql | 26 ++++++++ synapse/storage/stream.py | 46 +++++++++++--- tests/handlers/test_presence.py | 71 ++++++++++++++++------ tests/handlers/test_typing.py | 30 +++++++-- tests/rest/client/v1/test_presence.py | 9 ++- tests/rest/client/v1/test_typing.py | 5 +- 20 files changed, 299 insertions(+), 82 deletions(-) create mode 100644 synapse/storage/schema/delta/25/history_visibility.sql (limited to 'synapse/storage/schema') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 6873a4575d..a9e43052b7 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -47,7 +47,8 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, events, is_guest=False): + def _filter_events_for_client(self, user_id, events, is_guest=False, + require_all_visible_for_guests=True): # Assumes that user has at some point joined the room if not is_guest. def allowed(event, membership, visibility): @@ -100,7 +101,9 @@ class BaseHandler(object): if should_include: events_to_return.append(event) - if is_guest and len(events_to_return) < len(events): + if (require_all_visible_for_guests + and is_guest + and len(events_to_return) < len(events)): # This indicates that some events in the requested range were not # visible to guest users. To be safe, we reject the entire request, # so that we don't have to worry about interpreting visibility diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 53c8ca3a26..0e4c0d4d06 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -100,7 +100,7 @@ class EventStreamHandler(BaseHandler): @log_function def get_stream(self, auth_user_id, pagin_config, timeout=0, as_client_event=True, affect_presence=True, - only_room_events=False): + only_room_events=False, room_id=None, is_guest=False): """Fetches the events stream for a given user. If `only_room_events` is `True` only room events will be returned. @@ -119,9 +119,15 @@ class EventStreamHandler(BaseHandler): # thundering herds on restart. timeout = random.randint(int(timeout*0.9), int(timeout*1.1)) + if is_guest: + yield self.distributor.fire( + "user_joined_room", user=auth_user, room_id=room_id + ) + events, tokens = yield self.notifier.get_events_for( auth_user, pagin_config, timeout, - only_room_events=only_room_events + only_room_events=only_room_events, + is_guest=is_guest, guest_room_id=room_id ) time_now = self.clock.time_msec() diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 687e1527f7..654ecd2b37 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import SynapseError +from synapse.api.errors import SynapseError, AuthError, Codes from synapse.streams.config import PaginationConfig from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator @@ -229,7 +229,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def get_room_data(self, user_id=None, room_id=None, - event_type=None, state_key=""): + event_type=None, state_key="", is_guest=False): """ Get data from a room. Args: @@ -239,23 +239,42 @@ class MessageHandler(BaseHandler): Raises: SynapseError if something went wrong. """ - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id, is_guest + ) - if member_event.membership == Membership.JOIN: + if membership == Membership.JOIN: data = yield self.state_handler.get_current_state( room_id, event_type, state_key ) - elif member_event.membership == Membership.LEAVE: + elif membership == Membership.LEAVE: key = (event_type, state_key) room_state = yield self.store.get_state_for_events( - [member_event.event_id], [key] + [membership_event_id], [key] ) - data = room_state[member_event.event_id].get(key) + data = room_state[membership_event_id].get(key) defer.returnValue(data) @defer.inlineCallbacks - def get_state_events(self, user_id, room_id): + def _check_in_room_or_world_readable(self, room_id, user_id, is_guest): + if is_guest: + visibility = yield self.state_handler.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if visibility.content["history_visibility"] == "world_readable": + defer.returnValue((Membership.JOIN, None)) + return + else: + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) + else: + member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + defer.returnValue((member_event.membership, member_event.event_id)) + + @defer.inlineCallbacks + def get_state_events(self, user_id, room_id, is_guest=False): """Retrieve all state events for a given room. If the user is joined to the room then return the current state. If the user has left the room return the state events from when they left. @@ -266,15 +285,17 @@ class MessageHandler(BaseHandler): Returns: A list of dicts representing state events. [{}, {}, {}] """ - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id, is_guest + ) - if member_event.membership == Membership.JOIN: + if membership == Membership.JOIN: room_state = yield self.state_handler.get_current_state(room_id) - elif member_event.membership == Membership.LEAVE: + elif membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( - [member_event.event_id], None + [membership_event_id], None ) - room_state = room_state[member_event.event_id] + room_state = room_state[membership_event_id] now = self.clock.time_msec() defer.returnValue( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ce60642127..0b780cd528 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1142,8 +1142,9 @@ class PresenceEventSource(object): @defer.inlineCallbacks @log_function - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, user, from_key, room_ids=None, **kwargs): from_key = int(from_key) + room_ids = room_ids or [] presence = self.hs.get_handlers().presence_handler cachemap = presence._user_cachemap @@ -1161,7 +1162,6 @@ class PresenceEventSource(object): user_ids_to_check |= set( UserID.from_string(p["observed_user_id"]) for p in presence_list ) - room_ids = yield presence.get_joined_rooms_for_user(user) for room_id in set(room_ids) & set(presence._room_serials): if presence._room_serials[room_id] > from_key: joined = yield presence.get_joined_users_for_room_id(room_id) diff --git a/synapse/handlers/private_user_data.py b/synapse/handlers/private_user_data.py index 1778c71325..1abe45ed7b 100644 --- a/synapse/handlers/private_user_data.py +++ b/synapse/handlers/private_user_data.py @@ -24,7 +24,7 @@ class PrivateUserDataEventSource(object): return self.store.get_max_private_user_data_stream_id() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, user, from_key, **kwargs): user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a47ae3df42..973f4d5cae 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -164,17 +164,15 @@ class ReceiptEventSource(object): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, from_key, room_ids, **kwargs): from_key = int(from_key) to_key = yield self.get_current_key() if from_key == to_key: defer.returnValue(([], to_key)) - rooms = yield self.store.get_rooms_for_user(user.to_string()) - rooms = [room.room_id for room in rooms] events = yield self.store.get_linearized_receipts_for_rooms( - rooms, + room_ids, from_key=from_key, to_key=to_key, ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9184dcd048..736ffe9066 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -807,7 +807,14 @@ class RoomEventSource(object): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events( + self, + user, + from_key, + limit, + room_ids, + is_guest, + ): # We just ignore the key for now. to_key = yield self.get_current_key() @@ -828,6 +835,8 @@ class RoomEventSource(object): from_key=from_key, to_key=to_key, limit=limit, + room_ids=room_ids, + is_guest=is_guest, ) defer.returnValue((events, end_key)) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1c1ee34b1e..5294d96466 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -295,11 +295,16 @@ class SyncHandler(BaseHandler): typing_key = since_token.typing_key if since_token else "0" + rooms = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = [room.room_id for room in rooms] + typing_source = self.event_sources.sources["typing"] - typing, typing_key = yield typing_source.get_new_events_for_user( + typing, typing_key = yield typing_source.get_new_events( user=sync_config.user, from_key=typing_key, limit=sync_config.filter.ephemeral_limit(), + room_ids=room_ids, + is_guest=False, ) now_token = now_token.copy_and_replace("typing_key", typing_key) @@ -312,10 +317,13 @@ class SyncHandler(BaseHandler): receipt_key = since_token.receipt_key if since_token else "0" receipt_source = self.event_sources.sources["receipt"] - receipts, receipt_key = yield receipt_source.get_new_events_for_user( + receipts, receipt_key = yield receipt_source.get_new_events( user=sync_config.user, from_key=receipt_key, limit=sync_config.filter.ephemeral_limit(), + room_ids=room_ids, + # /sync doesn't support guest access, they can't get to this point in code + is_guest=False, ) now_token = now_token.copy_and_replace("receipt_key", receipt_key) @@ -360,11 +368,17 @@ class SyncHandler(BaseHandler): """ now_token = yield self.event_sources.get_current_token() + rooms = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = [room.room_id for room in rooms] + presence_source = self.event_sources.sources["presence"] - presence, presence_key = yield presence_source.get_new_events_for_user( + presence, presence_key = yield presence_source.get_new_events( user=sync_config.user, from_key=since_token.presence_key, limit=sync_config.filter.presence_limit(), + room_ids=room_ids, + # /sync doesn't support guest access, they can't get to this point in code + is_guest=False, ) now_token = now_token.copy_and_replace("presence_key", presence_key) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index d7096aab8c..2846f3e6e8 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -246,17 +246,12 @@ class TypingNotificationEventSource(object): }, } - @defer.inlineCallbacks - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events(self, from_key, room_ids, **kwargs): from_key = int(from_key) handler = self.handler() - joined_room_ids = ( - yield self.room_member_handler().get_joined_rooms_for_user(user) - ) - events = [] - for room_id in joined_room_ids: + for room_id in room_ids: if room_id not in handler._room_serials: continue if handler._room_serials[room_id] <= from_key: @@ -264,7 +259,7 @@ class TypingNotificationEventSource(object): events.append(self._make_event_for(room_id)) - defer.returnValue((events, handler._latest_room_serial)) + return events, handler._latest_room_serial def get_current_key(self): return self.handler()._latest_room_serial diff --git a/synapse/notifier.py b/synapse/notifier.py index b69da63d43..56c4c863b5 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -269,7 +269,7 @@ class Notifier(object): logger.exception("Failed to notify listener") @defer.inlineCallbacks - def wait_for_events(self, user, timeout, callback, + def wait_for_events(self, user, timeout, callback, room_ids=None, from_token=StreamToken("s0", "0", "0", "0", "0")): """Wait until the callback returns a non empty response or the timeout fires. @@ -279,11 +279,12 @@ class Notifier(object): if user_stream is None: appservice = yield self.store.get_app_service_by_user_id(user) current_token = yield self.event_sources.get_current_token() - rooms = yield self.store.get_rooms_for_user(user) - rooms = [room.room_id for room in rooms] + if room_ids is None: + rooms = yield self.store.get_rooms_for_user(user) + room_ids = [room.room_id for room in rooms] user_stream = _NotifierUserStream( user=user, - rooms=rooms, + rooms=room_ids, appservice=appservice, current_token=current_token, time_now_ms=self.clock.time_msec(), @@ -329,7 +330,8 @@ class Notifier(object): @defer.inlineCallbacks def get_events_for(self, user, pagination_config, timeout, - only_room_events=False): + only_room_events=False, + is_guest=False, guest_room_id=None): """ For the given user and rooms, return any new events for them. If there are no new events wait for up to `timeout` milliseconds for any new events to happen before returning. @@ -342,6 +344,16 @@ class Notifier(object): limit = pagination_config.limit + room_ids = [] + if is_guest: + # TODO(daniel): Deal with non-room events too + only_room_events = True + if guest_room_id: + room_ids = [guest_room_id] + else: + rooms = yield self.store.get_rooms_for_user(user.to_string()) + room_ids = [room.room_id for room in rooms] + @defer.inlineCallbacks def check_for_updates(before_token, after_token): if not after_token.is_after(before_token): @@ -357,9 +369,23 @@ class Notifier(object): continue if only_room_events and name != "room": continue - new_events, new_key = yield source.get_new_events_for_user( - user, getattr(from_token, keyname), limit, + new_events, new_key = yield source.get_new_events( + user=user, + from_key=getattr(from_token, keyname), + limit=limit, + is_guest=is_guest, + room_ids=room_ids, ) + + if is_guest: + room_member_handler = self.hs.get_handlers().room_member_handler + new_events = yield room_member_handler._filter_events_for_client( + user.to_string(), + new_events, + is_guest=is_guest, + require_all_visible_for_guests=False + ) + events.extend(new_events) end_token = end_token.copy_and_replace(keyname, new_key) @@ -369,7 +395,7 @@ class Notifier(object): defer.returnValue(None) result = yield self.wait_for_events( - user, timeout, check_for_updates, from_token=from_token + user, timeout, check_for_updates, room_ids=room_ids, from_token=from_token ) if result is None: diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/v1/events.py index 4073b0d2d1..3e1750d1a1 100644 --- a/synapse/rest/client/v1/events.py +++ b/synapse/rest/client/v1/events.py @@ -34,7 +34,15 @@ class EventStreamRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request): - auth_user, _, _ = yield self.auth.get_user_by_req(request) + auth_user, _, is_guest = yield self.auth.get_user_by_req( + request, + allow_guest=True + ) + room_id = None + if is_guest: + if "room_id" not in request.args: + raise SynapseError(400, "Guest users must specify room_id param") + room_id = request.args["room_id"][0] try: handler = self.handlers.event_stream_handler pagin_config = PaginationConfig.from_request(request) @@ -49,7 +57,8 @@ class EventStreamRestServlet(ClientV1RestServlet): chunk = yield handler.get_stream( auth_user.to_string(), pagin_config, timeout=timeout, - as_client_event=as_client_event + as_client_event=as_client_event, affect_presence=(not is_guest), + room_id=room_id, is_guest=is_guest ) except: logger.exception("Event stream failed") diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 0876e593c5..afb802baec 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -125,7 +125,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id, event_type, state_key): - user, _, _ = yield self.auth.get_user_by_req(request) + user, _, is_guest = yield self.auth.get_user_by_req(request, allow_guest=True) msg_handler = self.handlers.message_handler data = yield msg_handler.get_room_data( @@ -133,6 +133,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): room_id=room_id, event_type=event_type, state_key=state_key, + is_guest=is_guest, ) if not data: @@ -348,12 +349,13 @@ class RoomStateRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): - user, _, _ = yield self.auth.get_user_by_req(request) + user, _, is_guest = yield self.auth.get_user_by_req(request, allow_guest=True) handler = self.handlers.message_handler # Get all the current state for this room events = yield handler.get_state_events( room_id=room_id, user_id=user.to_string(), + is_guest=is_guest, ) defer.returnValue((200, events)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e6c1abfc27..59c9987202 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -311,6 +311,8 @@ class EventsStore(SQLBaseStore): self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) + elif event.type == EventTypes.RoomHistoryVisibility: + self._store_history_visibility_txn(txn, event) self._store_room_members_txn( txn, diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 13441fcdce..1c79626736 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -202,6 +202,19 @@ class RoomStore(SQLBaseStore): txn, event, "content.body", event.content["body"] ) + def _store_history_visibility_txn(self, txn, event): + if hasattr(event, "content") and "history_visibility" in event.content: + sql = ( + "INSERT INTO history_visibility" + " (event_id, room_id, history_visibility)" + " VALUES (?, ?, ?)" + ) + txn.execute(sql, ( + event.event_id, + event.room_id, + event.content["history_visibility"] + )) + def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): sql = ( diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/schema/delta/25/history_visibility.sql new file mode 100644 index 0000000000..9f387ed69f --- /dev/null +++ b/synapse/storage/schema/delta/25/history_visibility.sql @@ -0,0 +1,26 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of history_visibility content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS history_visibility( + id INTEGER PRIMARY KEY, + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + history_visibility TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index c728013f4c..be8ba76aae 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -158,13 +158,40 @@ class StreamStore(SQLBaseStore): defer.returnValue(results) @log_function - def get_room_events_stream(self, user_id, from_key, to_key, limit=0): - current_room_membership_sql = ( - "SELECT m.room_id FROM room_memberships as m " - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id AND c.state_key = m.user_id" - " WHERE m.user_id = ? AND m.membership = 'join'" - ) + def get_room_events_stream( + self, + user_id, + from_key, + to_key, + limit=0, + is_guest=False, + room_ids=None + ): + room_ids = room_ids or [] + room_ids = [r for r in room_ids] + if is_guest: + current_room_membership_sql = ( + "SELECT c.room_id FROM history_visibility AS h" + " INNER JOIN current_state_events AS c" + " ON h.event_id = c.event_id" + " WHERE c.room_id IN (%s) AND h.history_visibility = 'world_readable'" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + ) + current_room_membership_args = room_ids + else: + current_room_membership_sql = ( + "SELECT m.room_id FROM room_memberships as m " + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id AND c.state_key = m.user_id" + " WHERE m.user_id = ? AND m.membership = 'join'" + ) + current_room_membership_args = [user_id] + if room_ids: + current_room_membership_sql += " AND m.room_id in (%s)" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + current_room_membership_args = [user_id] + room_ids # We also want to get any membership events about that user, e.g. # invites or leave notifications. @@ -173,6 +200,7 @@ class StreamStore(SQLBaseStore): "INNER JOIN current_state_events as c ON m.event_id = c.event_id " "WHERE m.user_id = ? " ) + membership_args = [user_id] if limit: limit = max(limit, MAX_STREAM_SIZE) @@ -199,7 +227,9 @@ class StreamStore(SQLBaseStore): } def f(txn): - txn.execute(sql, (False, user_id, user_id, from_id.stream, to_id.stream,)) + args = ([False] + current_room_membership_args + membership_args + + [from_id.stream, to_id.stream]) + txn.execute(sql, args) rows = self.cursor_to_dict(txn) diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 29372d488a..10d4482cce 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -650,9 +650,30 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): {"presence": ONLINE} ) + # Apple sees self-reflection even without room_id + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + ) + + self.assertEquals(self.event_source.get_current_key(), 1) + self.assertEquals(events, + [ + {"type": "m.presence", + "content": { + "user_id": "@apple:test", + "presence": ONLINE, + "last_active_ago": 0, + }}, + ], + msg="Presence event should be visible to self-reflection" + ) + # Apple sees self-reflection - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -684,8 +705,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Banana sees it because of presence subscription - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_banana, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_banana, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -702,8 +725,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Elderberry sees it because of same room - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_elderberry, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_elderberry, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -720,8 +745,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) # Durian is not in the room, should not see this event - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_durian, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_durian, + from_key=0, + room_ids=[], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -767,8 +794,9 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): "accepted": True}, ], presence) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 1, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=1, ) self.assertEquals(self.event_source.get_current_key(), 2) @@ -858,8 +886,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): ) ) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id], ) self.assertEquals(self.event_source.get_current_key(), 1) @@ -905,8 +935,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.assertEquals(self.event_source.get_current_key(), 1) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id,] ) self.assertEquals(events, [ @@ -932,8 +964,10 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.assertEquals(self.event_source.get_current_key(), 2) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, + room_ids=[self.room_id,] ) self.assertEquals(events, [ @@ -966,8 +1000,9 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): self.room_members.append(self.u_clementine) - (events, _) = yield self.event_source.get_new_events_for_user( - self.u_apple, 0, None + (events, _) = yield self.event_source.get_new_events( + user=self.u_apple, + from_key=0, ) self.assertEquals(self.event_source.get_current_key(), 1) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 41bb08b7ca..2d7ba43561 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -187,7 +187,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -250,7 +253,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0 + ) self.assertEquals( events[0], [ @@ -306,7 +312,10 @@ class TypingNotificationsTestCase(unittest.TestCase): yield put_json.await_calls() self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -337,7 +346,10 @@ class TypingNotificationsTestCase(unittest.TestCase): self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ @@ -356,7 +368,10 @@ class TypingNotificationsTestCase(unittest.TestCase): ]) self.assertEquals(self.event_source.get_current_key(), 2) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 1, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=1, + ) self.assertEquals( events[0], [ @@ -383,7 +398,10 @@ class TypingNotificationsTestCase(unittest.TestCase): self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 3) - events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) + events = yield self.event_source.get_new_events( + room_ids=[self.room_id], + from_key=0, + ) self.assertEquals( events[0], [ diff --git a/tests/rest/client/v1/test_presence.py b/tests/rest/client/v1/test_presence.py index 3e0f294630..7f29d73d95 100644 --- a/tests/rest/client/v1/test_presence.py +++ b/tests/rest/client/v1/test_presence.py @@ -47,7 +47,14 @@ class NullSource(object): def __init__(self, hs): pass - def get_new_events_for_user(self, user, from_key, limit): + def get_new_events( + self, + user, + from_key, + room_ids=None, + limit=None, + is_guest=None + ): return defer.succeed(([], from_key)) def get_current_key(self, direction='f'): diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 8433585616..61b9cc743b 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -116,7 +116,10 @@ class RoomTypingTestCase(RestTestCase): self.assertEquals(200, code) self.assertEquals(self.event_source.get_current_key(), 1) - events = yield self.event_source.get_new_events_for_user(self.user, 0, None) + events = yield self.event_source.get_new_events( + from_key=0, + room_ids=[self.room_id], + ) self.assertEquals( events[0], [ -- cgit 1.4.1 From f2c4ee41b91f1828d516df871adcfbaed46d5407 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Fri, 6 Nov 2015 14:27:49 +0000 Subject: Remove accidentally added ID column --- synapse/storage/schema/delta/25/history_visibility.sql | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/schema/delta/25/history_visibility.sql index 9f387ed69f..532cb05151 100644 --- a/synapse/storage/schema/delta/25/history_visibility.sql +++ b/synapse/storage/schema/delta/25/history_visibility.sql @@ -18,7 +18,6 @@ * so that we can join on them in SELECT statements. */ CREATE TABLE IF NOT EXISTS history_visibility( - id INTEGER PRIMARY KEY, event_id TEXT NOT NULL, room_id TEXT NOT NULL, history_visibility TEXT NOT NULL, -- cgit 1.4.1 From c6a01f2ed0da9f0db169307e7d1649021f0c2123 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 9 Nov 2015 14:37:28 +0000 Subject: Add storage module for tracking background updates. The progress for each background update is stored as a JSON blob in the database. Each background update is broken up into separate batches. The batch size is automatically tuned to try avoid blocking single threaded databases for too long. --- synapse/storage/background_updates.py | 210 +++++++++++++++++++++ .../schema/delta/25/00background_updates.sql | 21 +++ 2 files changed, 231 insertions(+) create mode 100644 synapse/storage/background_updates.py create mode 100644 synapse/storage/schema/delta/25/00background_updates.sql (limited to 'synapse/storage/schema') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py new file mode 100644 index 0000000000..32a233c213 --- /dev/null +++ b/synapse/storage/background_updates.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore + +from twisted.internet import defer + +import ujson as json +import logging + +logger = logging.getLogger(__name__) + + +class BackgroundUpdatePerformance(object): + """Tracks the how long a background update is taking to update its items""" + + def __init__(self, name): + self.name = name + self.total_item_count = 0 + self.total_duration_ms = 0 + self.avg_item_count = 0 + self.avg_duration_ms = 0 + + def update(self, item_count, duration_ms): + """Update the stats after doing an update""" + self.total_item_count += item_count + self.total_duration_ms += duration_ms + + # Exponential moving averages for the number of items updated and + # the duration. + self.avg_item_count += 0.1 * (item_count - self.avg_item_count) + self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms) + + def average_duration_ms_per_item(self): + """An estimate of how long it takes to do a single update. + Returns: + A duration in ms as a float + """ + if self.total_item_count == 0: + return None + else: + # Use the exponential moving average so that we can adapt to + # changes in how long the update process takes. + return float(self.avg_duration_ms) / float(self.avg_item_count) + + +class BackgroundUpdateStore(SQLBaseStore): + """ Background updates are updates to the database that run in the + background. Each update processes a batch of data at once. We attempt to + limit the impact of each update by monitoring how long each batch takes to + process and autotuning the batch size. + """ + + MINIMUM_BACKGROUND_BATCH_SIZE = 100 + DEFAULT_BACKGROUND_BATCH_SIZE = 100 + + def __init__(self, hs): + super(BackgroundUpdateStore, self).__init__(hs) + self._background_update_performance = {} + self._background_update_queue = [] + self._background_update_handlers = {} + + @defer.inlineCallbacks + def do_background_update(self, desired_duration_ms): + """Does some amount of work on a background update + Args: + desired_duration_ms(float): How long we want to spend + updating. + Returns: + A deferred that completes once some amount of work is done. + The deferred will have a value of None if there is currently + no more work to do. + """ + if not self._background_update_queue: + updates = yield self._simple_select_list( + "background_updates", + keyvalues=None, + retcols=("update_name",), + ) + for update in updates: + self._background_update_queue.append(update['update_name']) + + if not self._background_update_queue: + defer.returnValue(None) + + update_name = self._background_update_queue.pop(0) + self._background_update_queue.append(update_name) + + update_handler = self._background_update_handlers[update_name] + + performance = self._background_update_performance.get(update_name) + + if performance is None: + performance = BackgroundUpdatePerformance(update_name) + self._background_update_performance[update_name] = performance + + duration_ms_per_item = performance.average_duration_ms_per_item() + + if duration_ms_per_item is not None: + batch_size = int(desired_duration_ms / duration_ms_per_item) + # Clamp the batch size so that we always make progress + batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) + else: + batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE + + progress_json = yield self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="progress_json" + ) + + progress = json.loads(progress_json) + + time_start = self._clock.time_msec() + items_updated = yield update_handler(progress, batch_size) + time_stop = self._clock.time_msec() + + duration_ms = time_stop - time_start + + logger.info( + "Updating %. Updated %r items in %rms", + update_name, items_updated, duration_ms + ) + + performance.update(items_updated, duration_ms) + + defer.returnValue(len(self._background_update_performance)) + + def register_background_update_handler(self, update_name, update_handler): + """Register a handler for doing a background update. + + The handler should take two arguments: + + * A dict of the current progress + * An integer count of the number of items to update in this batch. + + The handler should return a deferred integer count of items updated. + The hander is responsible for updating the progress of the update. + + Args: + update_name(str): The name of the update that this code handles. + update_handler(function): The function that does the update. + """ + self._background_update_handlers[update_name] = update_handler + + def start_background_update(self, update_name, progress): + """Starts a background update running. + + Args: + update_name: The update to set running. + progress: The initial state of the progress of the update. + + Returns: + A deferred that completes once the task has been added to the + queue. + """ + # Clear the background update queue so that we will pick up the new + # task on the next iteration of do_background_update. + self._background_update_queue = [] + progress_json = json.dumps(progress) + + return self._simple_insert( + "background_updates", + {"update_name": update_name, "progress_json": progress_json} + ) + + def _end_background_update(self, update_name): + """Removes a completed background update task from the queue. + + Args: + update_name(str): The name of the completed task to remove + Returns: + A deferred that completes once the task is removed. + """ + self._background_update_queue = [ + name for name in self._background_update_queue if name != update_name + ] + return self._simple_delete_one( + "background_updates", keyvalues={"update_name": update_name} + ) + + def _background_update_progress_txn(self, txn, update_name, progress): + """Update the progress of a background update + + Args: + txn(cursor): The transaction. + update_name(str): The name of the background update task + progress(dict): The progress of the update. + """ + + progress_json = json.dumps(progress) + + self._simple_update_one_txn( + txn, + "background_updates", + keyvalues={"update_name": update_name}, + updatevalues={"progress_json": progress_json}, + ) diff --git a/synapse/storage/schema/delta/25/00background_updates.sql b/synapse/storage/schema/delta/25/00background_updates.sql new file mode 100644 index 0000000000..41a9b59b1b --- /dev/null +++ b/synapse/storage/schema/delta/25/00background_updates.sql @@ -0,0 +1,21 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS background_updates( + update_name TEXT NOT NULL, -- The name of the background update. + progress_json TEXT NOT NULL, -- The current progress of the update as JSON. + CONSTRAINT background_updates_uniqueness UNIQUE (update_name) +); -- cgit 1.4.1 From 90b503216c40974dc4925a9bbb673779a039143c Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 10 Nov 2015 16:20:13 +0000 Subject: Use a background task to update databases to use the full text search --- synapse/storage/schema/delta/25/fts.py | 100 ++++++++------------------------- synapse/storage/search.py | 8 +-- 2 files changed, 28 insertions(+), 80 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index b7cd0ce3b8..e408d36da0 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -22,7 +22,7 @@ import ujson logger = logging.getLogger(__name__) -POSTGRES_SQL = """ +POSTGRES_TABLE = """ CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, @@ -31,22 +31,6 @@ CREATE TABLE IF NOT EXISTS event_search ( vector tsvector ); -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); CREATE INDEX event_search_ev_ridx ON event_search(room_id); @@ -61,67 +45,31 @@ SQLITE_TABLE = ( def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) + for statement in get_statements(POSTGRES_TABLE.splitlines()): + cur.execute(statement) return if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return - - -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) - - -def run_sqlite_upgrade(cur): cur.execute(SQLITE_TABLE) + return - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - content = ev.get("content", {}) - body = content.get("body", None) - name = content.get("name", None) - topic = content.get("topic", None) - sender = ev.get("sender", None) - if ev["type"] == "m.room.message" and body: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.body", body - )) - if ev["type"] == "m.room.name" and name: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.name", name - )) - if ev["type"] == "m.room.topic" and topic: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.topic", topic - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, sender, key, value)" - " VALUES (?,?,?,?,?)", - rows - ) + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + cur.execute( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)", ("event_search", progress_json) + ) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d170c546b5..3b19b118eb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -37,8 +37,8 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id"] - max_stream_id = progress["max_stream_id"] + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) INSERT_CLUMP_SIZE = 1000 @@ -105,8 +105,8 @@ class SearchStore(BackgroundUpdateStore): txn.execute_many(sql, clump) progress = { - "target_max_stream_id": target_min_stream_id, - "max_stream_id": min_stream_id, + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(event_search_rows) } -- cgit 1.4.1 From cf437900e0c689aad40f3da89866cf84c0f7ef65 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 10 Nov 2015 17:10:27 +0000 Subject: Return world_readable and guest_can_join in /publicRooms --- synapse/storage/events.py | 2 + synapse/storage/room.py | 71 ++++++++++++++---------- synapse/storage/schema/delta/25/guest_access.sql | 25 +++++++++ tests/storage/test_room.py | 2 + 4 files changed, 71 insertions(+), 29 deletions(-) create mode 100644 synapse/storage/schema/delta/25/guest_access.sql (limited to 'synapse/storage/schema') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 59c9987202..4a365ff639 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -313,6 +313,8 @@ class EventsStore(SQLBaseStore): self._store_redaction(txn, event) elif event.type == EventTypes.RoomHistoryVisibility: self._store_history_visibility_txn(txn, event) + elif event.type == EventTypes.GuestAccess: + self._store_guest_access_txn(txn, event) self._store_room_members_txn( txn, diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 1c79626736..4f08df478c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -99,34 +99,39 @@ class RoomStore(SQLBaseStore): """ def f(txn): - topic_subquery = ( - "SELECT topics.event_id as event_id, " - "topics.room_id as room_id, topic " - "FROM topics " - "INNER JOIN current_state_events as c " - "ON c.event_id = topics.event_id " - ) - - name_subquery = ( - "SELECT room_names.event_id as event_id, " - "room_names.room_id as room_id, name " - "FROM room_names " - "INNER JOIN current_state_events as c " - "ON c.event_id = room_names.event_id " - ) + def subquery(table_name, column_name=None): + column_name = column_name or table_name + return ( + "SELECT %(table_name)s.event_id as event_id, " + "%(table_name)s.room_id as room_id, %(column_name)s " + "FROM %(table_name)s " + "INNER JOIN current_state_events as c " + "ON c.event_id = %(table_name)s.event_id " % { + "column_name": column_name, + "table_name": table_name, + } + ) - # We use non printing ascii character US (\x1F) as a separator sql = ( - "SELECT r.room_id, max(n.name), max(t.topic)" + "SELECT" + " r.room_id," + " max(n.name)," + " max(t.topic)," + " max(v.history_visibility)," + " max(g.guest_access)" " FROM rooms AS r" " LEFT JOIN (%(topic)s) AS t ON t.room_id = r.room_id" " LEFT JOIN (%(name)s) AS n ON n.room_id = r.room_id" + " LEFT JOIN (%(history_visibility)s) AS v ON v.room_id = r.room_id" + " LEFT JOIN (%(guest_access)s) AS g ON g.room_id = r.room_id" " WHERE r.is_public = ?" - " GROUP BY r.room_id" - ) % { - "topic": topic_subquery, - "name": name_subquery, - } + " GROUP BY r.room_id" % { + "topic": subquery("topics", "topic"), + "name": subquery("room_names", "name"), + "history_visibility": subquery("history_visibility"), + "guest_access": subquery("guest_access"), + } + ) txn.execute(sql, (is_public,)) @@ -156,10 +161,12 @@ class RoomStore(SQLBaseStore): "room_id": r[0], "name": r[1], "topic": r[2], - "aliases": r[3], + "world_readable": r[3] == "world_readable", + "guest_can_join": r[4] == "can_join", + "aliases": r[5], } for r in rows - if r[3] # We only return rooms that have at least one alias. + if r[5] # We only return rooms that have at least one alias. ] defer.returnValue(ret) @@ -203,16 +210,22 @@ class RoomStore(SQLBaseStore): ) def _store_history_visibility_txn(self, txn, event): - if hasattr(event, "content") and "history_visibility" in event.content: + self._store_content_index_txn(txn, event, "history_visibility") + + def _store_guest_access_txn(self, txn, event): + self._store_content_index_txn(txn, event, "guest_access") + + def _store_content_index_txn(self, txn, event, key): + if hasattr(event, "content") and key in event.content: sql = ( - "INSERT INTO history_visibility" - " (event_id, room_id, history_visibility)" - " VALUES (?, ?, ?)" + "INSERT INTO %(key)s" + " (event_id, room_id, %(key)s)" + " VALUES (?, ?, ?)" % {"key": key} ) txn.execute(sql, ( event.event_id, event.room_id, - event.content["history_visibility"] + event.content[key] )) def _store_event_search_txn(self, txn, event, key, value): diff --git a/synapse/storage/schema/delta/25/guest_access.sql b/synapse/storage/schema/delta/25/guest_access.sql new file mode 100644 index 0000000000..bdb90e7118 --- /dev/null +++ b/synapse/storage/schema/delta/25/guest_access.sql @@ -0,0 +1,25 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of guest_access content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS guest_access( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + guest_access TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index caffce64e3..91c967548d 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -73,6 +73,8 @@ class RoomStoreTestCase(unittest.TestCase): "room_id": self.room.to_string(), "topic": None, "aliases": [self.alias.to_string()], + "world_readable": False, + "guest_can_join": False, }, rooms[0]) -- cgit 1.4.1 From 940a16119205115c02a3b23e9f3c67a08486bae0 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 11 Nov 2015 13:59:40 +0000 Subject: Fix the background update --- synapse/storage/background_updates.py | 13 ++++++++----- synapse/storage/schema/delta/25/fts.py | 7 +++---- synapse/storage/search.py | 16 ++++++++-------- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index b6cdc6ec68..45fccc2e5e 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -93,16 +93,19 @@ class BackgroundUpdateStore(SQLBaseStore): sleep = defer.Deferred() self._background_update_timer = self._clock.call_later( - self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback + self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback, None ) try: yield sleep finally: self._background_update_timer = None - result = yield self.do_background_update( - self.BACKGROUND_UPDATE_DURATION_MS - ) + try: + result = yield self.do_background_update( + self.BACKGROUND_UPDATE_DURATION_MS + ) + except: + logger.exception("Error doing update") if result is None: logger.info( @@ -169,7 +172,7 @@ class BackgroundUpdateStore(SQLBaseStore): duration_ms = time_stop - time_start logger.info( - "Updating %. Updated %r items in %rms." + "Updating %r. Updated %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", update_name, items_updated, duration_ms, performance.total_items_per_ms(), diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index e408d36da0..ce92f59025 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -47,11 +47,10 @@ def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): for statement in get_statements(POSTGRES_TABLE.splitlines()): cur.execute(statement) - return - - if isinstance(database_engine, Sqlite3Engine): + elif isinstance(database_engine, Sqlite3Engine): cur.execute(SQLITE_TABLE) - return + else: + raise Exception("Unrecognized database engine") cur.execute("SELECT MIN(stream_ordering) FROM events") rows = cur.fetchall() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 3b19b118eb..3c0d671129 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -46,18 +46,18 @@ class SearchStore(BackgroundUpdateStore): def reindex_search_txn(txn): sql = ( - "SELECT stream_id, event_id FROM events" + "SELECT stream_ordering, event_id FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" " LIMIT ?" - ) % (" OR ".join("type = '%s'" % TYPES),) + ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),) - txn.execute(sql, target_min_stream_id, max_stream_id, batch_size) + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - rows = txn.fetch_all() + rows = txn.fetchall() if not rows: - return None + return 0 min_stream_id = rows[-1][0] event_ids = [row[1] for row in rows] @@ -102,7 +102,7 @@ class SearchStore(BackgroundUpdateStore): for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.execute_many(sql, clump) + txn.executemany(sql, clump) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -116,11 +116,11 @@ class SearchStore(BackgroundUpdateStore): return len(event_search_rows) - result = yield self.runInteration( + result = yield self.runInteraction( self.EVENT_SEARCH_UPDATE_NAME, reindex_search_txn ) - if result is None: + if not result: yield self._end_background_update(self.EVENT_SEARCH_UPDATE_NAME) defer.returnValue(result) -- cgit 1.4.1 From e1627388d15fac534e22b80c2f7cb1a4e1afacc3 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 11 Nov 2015 17:14:56 +0000 Subject: Fix param style to work on both sqlite and postgres --- synapse/storage/schema/delta/25/fts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index ce92f59025..5239d69073 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -68,7 +68,11 @@ def run_upgrade(cur, database_engine, *args, **kwargs): } progress_json = ujson.dumps(progress) - cur.execute( + sql = ( "INSERT into background_updates (update_name, progress_json)" - " VALUES (?, ?)", ("event_search", progress_json) + " VALUES (?, ?)" ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_search", progress_json)) -- cgit 1.4.1