From 8cbbfaefc1fc597cbbef52a10dbfb8ecd4d8a8cd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 23 Mar 2018 10:32:50 +0000 Subject: 404 correctly on missing paths via NoResource fixes https://github.com/matrix-org/synapse/issues/2043 and https://github.com/matrix-org/synapse/issues/2029 --- synapse/app/homeserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e477c7ced6..c00afbba28 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -56,7 +56,7 @@ from synapse.util.rlimit import change_resource_limit from synapse.util.versionstring import get_version_string from twisted.application import service from twisted.internet import defer, reactor -from twisted.web.resource import EncodingResourceWrapper, Resource +from twisted.web.resource import EncodingResourceWrapper, NoResource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File @@ -126,7 +126,7 @@ class SynapseHomeServer(HomeServer): if WEB_CLIENT_PREFIX in resources: root_resource = RootRedirect(WEB_CLIENT_PREFIX) else: - root_resource = Resource() + root_resource = NoResource() root_resource = create_resource_tree(resources, root_resource) -- cgit 1.4.1 From ef520d8d0e152a24fb6660fdd2def214b6e9caae Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Tue, 27 Mar 2018 14:12:22 +0100 Subject: Include coarse CPU and Memory use in stats callbacks. This requires the psutil module, and is still opt-in based on the report_stats config option. --- UPGRADE.rst | 12 ++++++++++++ synapse/app/homeserver.py | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'synapse/app/homeserver.py') diff --git a/UPGRADE.rst b/UPGRADE.rst index 2efe7ea60f..f6bb1070b1 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -48,6 +48,18 @@ returned by the Client-Server API: # configured on port 443. curl -kv https:///_matrix/client/versions 2>&1 | grep "Server:" +Upgrading to $NEXT_VERSION +==================== + +This release expands the anonymous usage stats sent if the opt-in +``report_stats`` configuration is set to ``true``. We now capture RSS memory +and cpu use at a very coarse level. This requires administrators to install +the optional ``psutil`` python module. + +We would appreciate it if you could assist by ensuring this module is available +and ``report_stats`` is enabled. This will let us see if performance changes to +synapse are having an impact to the general community. + Upgrading to v0.15.0 ==================== diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c00afbba28..313be42ded 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,6 +401,7 @@ def run(hs): start_time = clock.time() stats = {} + stats_process = None @defer.inlineCallbacks def phone_stats_home(): @@ -427,6 +428,10 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages + if stats_process is not None: + with stats_process.oneshot(): + stats["memory_rss"] = stats_process.memory_info().rss + stats["cpu_average"] = int(stats_process.cpu_info(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -438,6 +443,21 @@ def run(hs): logger.warn("Error reporting stats: %s", e) if hs.config.report_stats: + try: + import psutil + stats_process = psutil.Process() + # Ensure we can fetch both, and make the initial request for cpu_percent + # so the next request will use this as the initial point. + stats_process.memory_info().rss + stats_process.cpu_percent(interval=None) + except (ImportError, AttributeError): + logger.warn( + "report_stats enabled but psutil is not installed or incorrect version." + " Disabling reporting of memory/cpu stats." + " Ensuring psutil is available will help matrix track performance changes across releases." + ) + stats_process = None + logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) -- cgit 1.4.1 From a32d2548d986f7075e8310184ce0b70c69513a02 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:39:13 +0100 Subject: query and call for r30 stats --- synapse/app/homeserver.py | 2 ++ synapse/storage/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c00afbba28..8bce9f1ace 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,6 +425,8 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() + stats["r30_users"] = yield hs.get_datastore().count_r30_users() + daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b97e5e5ff4..10f99c3cd5 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -267,6 +267,42 @@ class DataStore(RoomMemberStore, RoomStore, ret = yield self.runInteraction("count_users", _count_users) defer.returnValue(ret) + @defer.inlineCallbacks + def count_r30_users(self): + """ + Counts the number of 30 day retained users, defined as:- + * Users who have created their accounts more than 30 days + * Where last seen at most 30 days ago + * Where account creation and last_seen are > 30 days + """ + def _count_r30_users(txn): + thirty_days_in_secs = 86400 * 30 + now = int(self._clock.time_msec()) + thirty_days_ago_in_secs = now - thirty_days_in_secs + + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) + FROM users, user_ips + WHERE users.name = user_ips.user_id + AND appservice_id is NULL + AND users.creation_ts < ? + AND user_ips.last_seen/1000 > ? + AND (user_ips.last_seen/1000) - users.creation_ts > ? + GROUP BY users.name, users.creation_ts + ) u + """ + + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_r30_users", _count_r30_users) + defer.returnValue(ret) + def get_users(self): """Function to reterive a list of users in users table. -- cgit 1.4.1 From 4ceaa7433a324afab23c4a445cabe3da965e5846 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 28 Mar 2018 12:08:09 +0100 Subject: As daemonizing will make a new process, defer call to init. --- synapse/app/homeserver.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 313be42ded..0737945ede 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,7 +401,7 @@ def run(hs): start_time = clock.time() stats = {} - stats_process = None + stats_process = [] @defer.inlineCallbacks def phone_stats_home(): @@ -428,10 +428,13 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages - if stats_process is not None: - with stats_process.oneshot(): - stats["memory_rss"] = stats_process.memory_info().rss - stats["cpu_average"] = int(stats_process.cpu_info(interval=None)) + if len(stats_process) > 0: + stats["memory_rss"] = 0 + stats["cpu_average"] = 0 + for process in stats_process: + with process.oneshot(): + stats["memory_rss"] += process.memory_info().rss + stats["cpu_average"] += int(process.cpu_percent(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -442,25 +445,32 @@ def run(hs): except Exception as e: logger.warn("Error reporting stats: %s", e) - if hs.config.report_stats: + def performance_stats_init(): try: import psutil - stats_process = psutil.Process() + process = psutil.Process() # Ensure we can fetch both, and make the initial request for cpu_percent # so the next request will use this as the initial point. - stats_process.memory_info().rss - stats_process.cpu_percent(interval=None) + process.memory_info().rss + process.cpu_percent(interval=None) + logger.info("report_stats can use psutil") + stats_process.append(process) except (ImportError, AttributeError): logger.warn( - "report_stats enabled but psutil is not installed or incorrect version." - " Disabling reporting of memory/cpu stats." - " Ensuring psutil is available will help matrix track performance changes across releases." + "report_stats enabled but psutil is not installed or incorrect version." + " Disabling reporting of memory/cpu stats." + " Ensuring psutil is available will help matrix track performance changes" + " across releases." ) - stats_process = None + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) + # We need to defer this init for the cases that we daemonize + # otherwise the process ID we get is that of the non-daemon process + clock.call_later(15, performance_stats_init) + # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) -- cgit 1.4.1 From 792d340572026becf48fe73421f0b73cf575fe46 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 12:25:02 +0100 Subject: rename stat to future proof --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 8bce9f1ace..286f4dcf7b 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,7 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users"] = yield hs.get_datastore().count_r30_users() + stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages -- cgit 1.4.1 From 33f6195d9ae91520aee9d108d60245b5265ac714 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 28 Mar 2018 14:25:25 +0100 Subject: Handle review comments --- synapse/app/homeserver.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 0737945ede..b935beb974 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,6 +401,9 @@ def run(hs): start_time = clock.time() stats = {} + + # Contains the list of processes we will be monitoring + # currently either 0 or 1 stats_process = [] @defer.inlineCallbacks @@ -428,13 +431,13 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages + if len(stats_process) > 0: stats["memory_rss"] = 0 stats["cpu_average"] = 0 for process in stats_process: - with process.oneshot(): - stats["memory_rss"] += process.memory_info().rss - stats["cpu_average"] += int(process.cpu_percent(interval=None)) + stats["memory_rss"] += process.memory_info().rss + stats["cpu_average"] += int(process.cpu_percent(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -459,8 +462,8 @@ def run(hs): logger.warn( "report_stats enabled but psutil is not installed or incorrect version." " Disabling reporting of memory/cpu stats." - " Ensuring psutil is available will help matrix track performance changes" - " across releases." + " Ensuring psutil is available will help matrix.org track performance" + " changes across releases." ) if hs.config.report_stats: @@ -469,7 +472,7 @@ def run(hs): # We need to defer this init for the cases that we daemonize # otherwise the process ID we get is that of the non-daemon process - clock.call_later(15, performance_stats_init) + clock.call_later(0, performance_stats_init) # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes -- cgit 1.4.1 From 86932be2cb1837688d154ff78fb6418f78483133 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 14:36:53 +0100 Subject: Support multi client R30 for psql --- synapse/app/homeserver.py | 4 +++- synapse/storage/__init__.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 286f4dcf7b..35e2b00f1b 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,9 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() + r30_results = yield hs.get_datastore().count_r30_users() + for name, count in r30_results.items(): + stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index ba43b2d8ec..b651973c79 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,6 +280,15 @@ class DataStore(RoomMemberStore, RoomStore, now = int(self._clock.time_msec()) thirty_days_ago_in_secs = now - thirty_days_in_secs + # Are these filters sufficiently robust? + filters = { + "ALL": "", + "IOS": "^(Vector|Riot|Riot\.im)\/.* iOS", + "ANDROID": "^(Dalvik|Riot|Riot\.im)\/.* Android", + "ELECTRON": "Electron", + "WEB": "(Gecko|Mozilla)", + } + sql = """ SELECT COALESCE(count(*), 0) FROM ( SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) @@ -289,16 +298,27 @@ class DataStore(RoomMemberStore, RoomStore, AND users.creation_ts < ? AND user_ips.last_seen/1000 > ? AND (user_ips.last_seen/1000) - users.creation_ts > ? - GROUP BY users.name, users.creation_ts - ) u """ - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs)) - - count, = txn.fetchone() - return count + if isinstance(self.database_engine, PostgresEngine): + sql = sql + "AND user_ips.user_agent ~ ? " + sql = sql + "GROUP BY users.name, users.creation_ts ) u" + + results = {} + if isinstance(self.database_engine, PostgresEngine): + for filter_name, user_agent_filter in filters.items(): + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs, + user_agent_filter)) + results[filter_name], = txn.fetchone() + + else: + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + results["ALL"], = txn.fetchone() + return results ret = yield self.runInteraction("count_r30_users", _count_r30_users) defer.returnValue(ret) -- cgit 1.4.1 From e4570c53dd35f00103e2353884d1dd446fc4c0f4 Mon Sep 17 00:00:00 2001 From: Jan Christian Grünhage Date: Wed, 4 Apr 2018 16:46:58 +0100 Subject: phone home cache size configurations --- synapse/app/homeserver.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b935beb974..464799ac90 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -48,6 +48,7 @@ from synapse.server import HomeServer from synapse.storage import are_all_users_on_domain from synapse.storage.engines import IncorrectDatabaseSetup, create_engine from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_database +from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole @@ -431,6 +432,8 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages + stats["cache_factor"] = CACHE_SIZE_FACTOR + stats["event_cache_size"] = hs.config.event_cache_size if len(stats_process) > 0: stats["memory_rss"] = 0 -- cgit 1.4.1 From 0e5f479fc05ef9257c1bfce033c8fb91e6244ffe Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 5 Apr 2018 12:16:46 +0100 Subject: Review comments Use iteritems over item to loop over dict formatting --- synapse/app/homeserver.py | 2 +- synapse/storage/__init__.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 35e2b00f1b..777e9c529a 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -426,7 +426,7 @@ def run(hs): stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() r30_results = yield hs.get_datastore().count_r30_users() - for name, count in r30_results.items(): + for name, count in r30_results.iteritems(): stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f68e436df0..4800584b59 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,8 +280,9 @@ class DataStore(RoomMemberStore, RoomStore, sql = """ SELECT platform, COALESCE(count(*), 0) FROM ( - SELECT users.name, platform, users.creation_ts * 1000, - MAX(uip.last_seen) + SELECT + users.name, platform, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT @@ -310,8 +311,8 @@ class DataStore(RoomMemberStore, RoomStore, results = {} txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - rows = txn.fetchall() - for row in rows: + + for row in txn: if row[0] is 'unknown': pass results[row[0]] = row[1] -- cgit 1.4.1 From 617bf40924bc8d627734eb2fb25a9f3980ede5d5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 25 Apr 2018 17:37:29 +0100 Subject: Generate user daily stats --- synapse/app/homeserver.py | 6 +++ synapse/storage/__init__.py | 60 ++++++++++++++++++++-- synapse/storage/client_ips.py | 7 +++ synapse/storage/prepare_database.py | 2 +- .../schema/delta/49/add_user_daily_visits.sql | 25 +++++++++ .../delta/49/add_user_ips_last_seen_only_index.sql | 17 ++++++ 6 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/49/add_user_daily_visits.sql create mode 100644 synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a0e465d644..808567365a 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -473,6 +473,9 @@ def run(hs): " changes across releases." ) + def generate_user_daily_visit_stats(): + hs.get_datastore().generate_user_daily_visits() + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) @@ -485,6 +488,9 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) + clock.looping_call(generate_user_daily_visit_stats, 60 * 1000) + clock.call_later(5 * 60, generate_user_daily_visit_stats) + if hs.config.daemonize and hs.config.print_pidfile: print (hs.config.pid_file) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 8cdfd50f90..6f2f947433 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,6 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import time +import logging + from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore @@ -55,10 +59,6 @@ from .engines import PostgresEngine from synapse.api.constants import PresenceState from synapse.util.caches.stream_change_cache import StreamChangeCache - -import logging - - logger = logging.getLogger(__name__) @@ -347,6 +347,58 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) + + def generate_user_daily_visits(self): + """ + Generates daily visit data for use in cohort/ retention analysis + """ + def _generate_user_daily_visits(txn): + logger.info("Calling _generate_user_daily_visits") + # determine timestamp of previous days + yesterday = datetime.datetime.now() - datetime.timedelta(days=1) + yesterday_start = datetime.datetime(yesterday.year, + yesterday.month, + yesterday.day, 0, 0, 0, 0) + yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000 + + # Check that this job has not already been completed + sql = """ + SELECT timestamp + FROM user_daily_visits + ORDER by timestamp desc limit 1 + """ + txn.execute(sql) + row = txn.fetchone() + + # Bail if the most recent time is yesterday + if row and row[0] == yesterday_start_time: + logger.info("Bailing from _generate_user_daily_visits, already completed") + return + logger.info("inserting into user_daily_visits") + # Not specificying an upper bound means that if the update is run at + # 10 mins past midnight and the user is active during a 30 min session + # that the user is still included in the previous days stats + # This does mean that if the update is run hours late, then it is possible + # to overstate the cohort, but this seems a reasonable trade off + # The alternative is to insert on every request - but prefer to avoid + # for performance reasons + sql = """ + SELECT user_id, user_agent, device_id + FROM user_ips + WHERE last_seen > ? + """ + txn.execute(sql, (yesterday_start_time,)) + + sql = """ + INSERT INTO user_daily_visits (user_id, user_agent, device_id, timestamp) + VALUES (?, ?, ?, ?) + """ + + for row in txn: + txn.execute(sql, (row + (yesterday_start_time,))) + + return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) + def get_users(self): """Function to reterive a list of users in users table. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 7b44dae0fc..ba46907737 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["user_id", "last_seen"], ) + self.register_background_index_update( + "user_ips_last_seen_only_index", + index_name="user_ips_last_seen_only", + table="user_ips", + columns=["last_seen"], + ) + # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) self._batch_row_update = {} diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 04411a665f..c08e9cd65a 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 48 +SCHEMA_VERSION = 49 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/schema/delta/49/add_user_daily_visits.sql new file mode 100644 index 0000000000..11356a97fc --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_daily_visits.sql @@ -0,0 +1,25 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, + device_id TEXT, + user_agent TEXT NOT NULL, + timestamp BIGINT NOT NULL ); + +/* What indexes should I include? + * Reads are offline so should optimise for writes + * Need to check if already an entry so user,day + */ diff --git a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql new file mode 100644 index 0000000000..3a4ed59b5b --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('user_ips_last_seen_only_index', '{}'); -- cgit 1.4.1 From 5917562b6040448cd26b67fac52d61b41ba08f6d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 1 May 2018 12:12:22 +0100 Subject: 10 mins seems more reasonable that every minute --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 808567365a..328ab8032c 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -488,7 +488,7 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) - clock.looping_call(generate_user_daily_visit_stats, 60 * 1000) + clock.looping_call(generate_user_daily_visit_stats, 10 * 60 * 1000) clock.call_later(5 * 60, generate_user_daily_visit_stats) if hs.config.daemonize and hs.config.print_pidfile: -- cgit 1.4.1 From 318711e1399da009910c3a9e5fa297c28a2d0a97 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 10 May 2018 18:46:59 +0100 Subject: Set Server header in SynapseRequest (instead of everywhere that writes a response. Or rather, the subset of places which write responses where we haven't forgotten it). This also means that we don't have to have the mysterious version_string attribute in anything with a request handler. Unfortunately it does mean that we have to pass the version string wherever we instantiate a SynapseSite, which has been c&ped 150 times, but that is code that ought to be cleaned up anyway really. --- synapse/app/appservice.py | 1 + synapse/app/client_reader.py | 1 + synapse/app/event_creator.py | 1 + synapse/app/federation_reader.py | 1 + synapse/app/federation_sender.py | 1 + synapse/app/frontend_proxy.py | 1 + synapse/app/homeserver.py | 2 ++ synapse/app/media_repository.py | 1 + synapse/app/pusher.py | 1 + synapse/app/synchrotron.py | 1 + synapse/app/user_dir.py | 1 + synapse/http/additional_resource.py | 3 +-- synapse/http/server.py | 14 ++++---------- synapse/http/site.py | 11 ++++++++++- synapse/rest/client/v1/pusher.py | 1 - synapse/rest/client/v2_alpha/auth.py | 2 -- synapse/rest/key/v1/server_key_resource.py | 2 -- synapse/rest/key/v2/local_key_resource.py | 2 -- synapse/rest/key/v2/remote_key_resource.py | 2 -- synapse/rest/media/v1/download_resource.py | 3 +-- synapse/rest/media/v1/preview_url_resource.py | 1 - synapse/rest/media/v1/thumbnail_resource.py | 1 - synapse/rest/media/v1/upload_resource.py | 1 - 23 files changed, 28 insertions(+), 27 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 58f2c9d68c..b1efacc9f8 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -74,6 +74,7 @@ class AppserviceServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 267d34c881..38b98382c6 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -98,6 +98,7 @@ class ClientReaderServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index b915d12d53..bd7f3d5679 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -114,6 +114,7 @@ class EventCreatorServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index c1dc66dd17..6e10b27b9e 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -87,6 +87,7 @@ class FederationReaderServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index a08af83a4c..6f24e32d6d 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -101,6 +101,7 @@ class FederationSenderServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index b349e3e3ce..0f700ee786 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -152,6 +152,7 @@ class FrontendProxyServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a0e465d644..75f40fd5a4 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -140,6 +140,7 @@ class SynapseHomeServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ), self.tls_server_context_factory, ) @@ -153,6 +154,7 @@ class SynapseHomeServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) logger.info("Synapse now listening on port %d", port) diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index fc8282bbc1..9c93195f0a 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -94,6 +94,7 @@ class MediaRepositoryServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 26930d1b3b..3912eae48c 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -104,6 +104,7 @@ class PusherServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 7152b1deb4..c6294a7a0c 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -281,6 +281,7 @@ class SynchrotronServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index 5ba7e9b416..53eb3474da 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -126,6 +126,7 @@ class UserDirectoryServer(HomeServer): site_tag, listener_config, root_resource, + self.version_string, ) ) diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py index d9e7f5dfb7..a797396ade 100644 --- a/synapse/http/additional_resource.py +++ b/synapse/http/additional_resource.py @@ -42,8 +42,7 @@ class AdditionalResource(Resource): Resource.__init__(self) self._handler = handler - # these are required by the request_handler wrapper - self.version_string = hs.version_string + # required by the request_handler wrapper self.clock = hs.get_clock() def render(self, request): diff --git a/synapse/http/server.py b/synapse/http/server.py index f29e36f490..b6e2ae14a2 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -51,8 +51,8 @@ def wrap_json_request_handler(h): Also adds logging as per wrap_request_handler_with_logging. The handler method must have a signature of "handle_foo(self, request)", - where "self" must have "version_string" and "clock" attributes (and - "request" must be a SynapseRequest). + where "self" must have a "clock" attribute (and "request" must be a + SynapseRequest). The handler must return a deferred. If the deferred succeeds we assume that a response has been sent. If the deferred fails with a SynapseError we use @@ -75,7 +75,6 @@ def wrap_json_request_handler(h): respond_with_json( request, code, cs_exception(e), send_cors=True, pretty_print=_request_user_agent_is_curl(request), - version_string=self.version_string, ) except Exception: @@ -98,7 +97,6 @@ def wrap_json_request_handler(h): }, send_cors=True, pretty_print=_request_user_agent_is_curl(request), - version_string=self.version_string, ) return wrap_request_handler_with_logging(wrapped_request_handler) @@ -192,7 +190,6 @@ class JsonResource(HttpServer, resource.Resource): self.canonical_json = canonical_json self.clock = hs.get_clock() self.path_regexs = {} - self.version_string = hs.version_string self.hs = hs def register_paths(self, method, path_patterns, callback): @@ -275,7 +272,6 @@ class JsonResource(HttpServer, resource.Resource): send_cors=True, response_code_message=response_code_message, pretty_print=_request_user_agent_is_curl(request), - version_string=self.version_string, canonical_json=self.canonical_json, ) @@ -326,7 +322,7 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False, - version_string="", canonical_json=True): + canonical_json=True): # could alternatively use request.notifyFinish() and flip a flag when # the Deferred fires, but since the flag is RIGHT THERE it seems like # a waste. @@ -348,12 +344,11 @@ def respond_with_json(request, code, json_object, send_cors=False, request, code, json_bytes, send_cors=send_cors, response_code_message=response_code_message, - version_string=version_string ) def respond_with_json_bytes(request, code, json_bytes, send_cors=False, - version_string="", response_code_message=None): + response_code_message=None): """Sends encoded JSON in response to the given request. Args: @@ -367,7 +362,6 @@ def respond_with_json_bytes(request, code, json_bytes, send_cors=False, request.setResponseCode(code, message=response_code_message) request.setHeader(b"Content-Type", b"application/json") - request.setHeader(b"Server", version_string) request.setHeader(b"Content-Length", b"%d" % (len(json_bytes),)) request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate") diff --git a/synapse/http/site.py b/synapse/http/site.py index bfd9832aa0..202a990508 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -77,6 +77,11 @@ class SynapseRequest(Request): def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] + def render(self, resrc): + # override the Server header which is set by twisted + self.setHeader("Server", self.site.server_version_string) + return Request.render(self, resrc) + def _started_processing(self, servlet_name): self.start_time = int(time.time() * 1000) self.request_metrics = RequestMetrics() @@ -151,6 +156,8 @@ class SynapseRequest(Request): It is possible to update this afterwards by updating self.request_metrics.servlet_name. """ + # TODO: we should probably just move this into render() and finish(), + # to save having to call a separate method. self._started_processing(servlet_name) yield self._finished_processing() @@ -191,7 +198,8 @@ class SynapseSite(Site): Subclass of a twisted http Site that does access logging with python's standard logging """ - def __init__(self, logger_name, site_tag, config, resource, *args, **kwargs): + def __init__(self, logger_name, site_tag, config, resource, + server_version_string, *args, **kwargs): Site.__init__(self, resource, *args, **kwargs) self.site_tag = site_tag @@ -199,6 +207,7 @@ class SynapseSite(Site): proxied = config.get("x_forwarded", False) self.requestFactory = SynapseRequestFactory(self, proxied) self.access_logger = logging.getLogger(logger_name) + self.server_version_string = server_version_string def log(self, request): pass diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py index 0206e664c1..40e523cc5f 100644 --- a/synapse/rest/client/v1/pusher.py +++ b/synapse/rest/client/v1/pusher.py @@ -176,7 +176,6 @@ class PushersRemoveRestServlet(RestServlet): request.setResponseCode(200) request.setHeader(b"Content-Type", b"text/html; charset=utf-8") - request.setHeader(b"Server", self.hs.version_string) request.setHeader(b"Content-Length", b"%d" % ( len(PushersRemoveRestServlet.SUCCESS_HTML), )) diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py index 8e5577148f..d6f3a19648 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/v2_alpha/auth.py @@ -129,7 +129,6 @@ class AuthRestServlet(RestServlet): html_bytes = html.encode("utf8") request.setResponseCode(200) request.setHeader(b"Content-Type", b"text/html; charset=utf-8") - request.setHeader(b"Server", self.hs.version_string) request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),)) request.write(html_bytes) @@ -175,7 +174,6 @@ class AuthRestServlet(RestServlet): html_bytes = html.encode("utf8") request.setResponseCode(200) request.setHeader(b"Content-Type", b"text/html; charset=utf-8") - request.setHeader(b"Server", self.hs.version_string) request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),)) request.write(html_bytes) diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py index bd4fea5774..1498d188c1 100644 --- a/synapse/rest/key/v1/server_key_resource.py +++ b/synapse/rest/key/v1/server_key_resource.py @@ -49,7 +49,6 @@ class LocalKey(Resource): """ def __init__(self, hs): - self.version_string = hs.version_string self.response_body = encode_canonical_json( self.response_json_object(hs.config) ) @@ -84,7 +83,6 @@ class LocalKey(Resource): def render_GET(self, request): return respond_with_json_bytes( request, 200, self.response_body, - version_string=self.version_string ) def getChild(self, name, request): diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py index be68d9a096..04775b3c45 100644 --- a/synapse/rest/key/v2/local_key_resource.py +++ b/synapse/rest/key/v2/local_key_resource.py @@ -63,7 +63,6 @@ class LocalKey(Resource): isLeaf = True def __init__(self, hs): - self.version_string = hs.version_string self.config = hs.config self.clock = hs.clock self.update_response_body(self.clock.time_msec()) @@ -115,5 +114,4 @@ class LocalKey(Resource): self.update_response_body(time_now) return respond_with_json_bytes( request, 200, self.response_body, - version_string=self.version_string ) diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 17b3077926..21b4c1175e 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -93,7 +93,6 @@ class RemoteKey(Resource): def __init__(self, hs): self.keyring = hs.get_keyring() self.store = hs.get_datastore() - self.version_string = hs.version_string self.clock = hs.get_clock() self.federation_domain_whitelist = hs.config.federation_domain_whitelist @@ -242,5 +241,4 @@ class RemoteKey(Resource): respond_with_json_bytes( request, 200, result_io.getvalue(), - version_string=self.version_string ) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 3fc3f64d62..8cf8820c31 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -37,9 +37,8 @@ class DownloadResource(Resource): self.media_repo = media_repo self.server_name = hs.hostname - # Both of these are expected by @request_handler() + # this is expected by @wrap_json_request_handler self.clock = hs.get_clock() - self.version_string = hs.version_string def render_GET(self, request): self._async_render_GET(request) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6b089689b4..2839207abc 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -58,7 +58,6 @@ class PreviewUrlResource(Resource): self.auth = hs.get_auth() self.clock = hs.get_clock() - self.version_string = hs.version_string self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 6c12d79f56..aae6e464e8 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -44,7 +44,6 @@ class ThumbnailResource(Resource): self.media_storage = media_storage self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.server_name = hs.hostname - self.version_string = hs.version_string self.clock = hs.get_clock() def render_GET(self, request): diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 7d01c57fd1..7567476fce 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -41,7 +41,6 @@ class UploadResource(Resource): self.server_name = hs.hostname self.auth = hs.get_auth() self.max_upload_size = hs.config.max_upload_size - self.version_string = hs.version_string self.clock = hs.get_clock() def render_POST(self, request): -- cgit 1.4.1 From f077e97914c9b5c82c94786130d98af52516cde0 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 14 May 2018 13:50:58 +0100 Subject: instead of inserting user daily visit data at the end of the day, instead insert incrementally through the day --- synapse/app/homeserver.py | 19 +++++++++++++--- synapse/storage/__init__.py | 54 ++++++++++++--------------------------------- 2 files changed, 30 insertions(+), 43 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f785a7a22b..bfc79a5e81 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -17,6 +17,7 @@ import gc import logging import os import sys +import datetime import synapse import synapse.config.logger @@ -475,9 +476,24 @@ def run(hs): " changes across releases." ) + # def recurring_user_daily_visit_stats(): + def generate_user_daily_visit_stats(): hs.get_datastore().generate_user_daily_visits() + # Since user daily stats are bucketed at midnight UTC, + # and user_ips.last_seen can be updated at any time, it is important to call + # generate_user_daily_visit_stats immediately prior to the day end. Assuming + # an hourly cadence, the simplist way is to allign all calls to the hour + # end + end_of_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) \ + + datetime.timedelta(hours=1) \ + - datetime.timedelta(seconds=10) # Ensure method fires before day transistion + + time_to_next_hour = end_of_hour - datetime.datetime.now() + clock.call_later(time_to_next_hour.seconds, + clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000)) + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) @@ -490,9 +506,6 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) - clock.looping_call(generate_user_daily_visit_stats, 10 * 60 * 1000) - clock.call_later(5 * 60, generate_user_daily_visit_stats) - if hs.config.daemonize and hs.config.print_pidfile: print (hs.config.pid_file) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b51cf70336..6949876c13 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -353,48 +353,22 @@ class DataStore(RoomMemberStore, RoomStore, Generates daily visit data for use in cohort/ retention analysis """ def _generate_user_daily_visits(txn): - logger.info("Calling _generate_user_daily_visits") - # determine timestamp of previous days - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) - yesterday_start = datetime.datetime(yesterday.year, yesterday.month, - yesterday.day, tzinfo=tz.tzutc()) - yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000 - - # Check that this job has not already been completed - sql = """ - SELECT timestamp - FROM user_daily_visits - ORDER by timestamp desc limit 1 - """ - txn.execute(sql) - row = txn.fetchone() - - # Bail if the most recent time is yesterday - if row and row[0] == yesterday_start_time: - return - - # Not specificying an upper bound means that if the update is run at - # 10 mins past midnight and the user is active during a 30 min session - # that the user is still included in the previous days stats - # This does mean that if the update is run hours late, then it is possible - # to overstate the cohort, but this seems a reasonable trade off - # The alternative is to insert on every request - but prefer to avoid - # for performance reasons - sql = """ - SELECT user_id, device_id - FROM user_ips - WHERE last_seen > ? - """ - txn.execute(sql, (yesterday_start_time,)) - user_visits = txn.fetchall() + # determine timestamp of the day start + now = datetime.datetime.utcnow() + today_start = datetime.datetime(now.year, now.month, + now.day, tzinfo=tz.tzutc()) + today_start_time = int(time.mktime(today_start.timetuple())) * 1000 + logger.info(today_start_time) sql = """ - INSERT INTO user_daily_visits (user_id, device_id, timestamp) - VALUES (?, ?, ?) - """ - - for visit in user_visits: - txn.execute(sql, (visit + (yesterday_start_time,))) + INSERT INTO user_daily_visits (user_id, device_id, timestamp) + SELECT user_id, device_id, ? + FROM user_ips AS u + LEFT JOIN user_daily_visits USING (user_id, device_id) + WHERE last_seen > ? AND timestamp IS NULL + GROUP BY user_id, device_id; + """ + txn.execute(sql, (today_start_time, today_start_time)) return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From 47815edcfae73c5b938f8354853a09c0b80ef27e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 11 May 2018 00:17:11 +0100 Subject: ConsentResource to gather policy consent from users Hopefully there are enough comments and docs in this that it makes sense on its own. --- docs/privacy_policy_templates/README.md | 23 +++ docs/privacy_policy_templates/en/1.0.html | 17 ++ docs/privacy_policy_templates/en/success.html | 11 ++ synapse/app/homeserver.py | 9 + synapse/config/__init__.py | 6 + synapse/config/consent_config.py | 42 +++++ synapse/config/homeserver.py | 8 +- synapse/config/key.py | 10 + synapse/http/server.py | 76 +++++++- synapse/rest/consent/__init__.py | 0 synapse/rest/consent/consent_resource.py | 210 +++++++++++++++++++++ synapse/server.py | 3 + synapse/storage/registration.py | 18 ++ .../storage/schema/delta/48/add_user_consent.sql | 18 ++ 14 files changed, 446 insertions(+), 5 deletions(-) create mode 100644 docs/privacy_policy_templates/README.md create mode 100644 docs/privacy_policy_templates/en/1.0.html create mode 100644 docs/privacy_policy_templates/en/success.html create mode 100644 synapse/config/consent_config.py create mode 100644 synapse/rest/consent/__init__.py create mode 100644 synapse/rest/consent/consent_resource.py create mode 100644 synapse/storage/schema/delta/48/add_user_consent.sql (limited to 'synapse/app/homeserver.py') diff --git a/docs/privacy_policy_templates/README.md b/docs/privacy_policy_templates/README.md new file mode 100644 index 0000000000..8e91c516b3 --- /dev/null +++ b/docs/privacy_policy_templates/README.md @@ -0,0 +1,23 @@ +If enabling the 'consent' resource in synapse, you will need some templates +for the HTML to be served to the user. This directory contains very simple +examples of the sort of thing that can be done. + +You'll need to add this sort of thing to your homeserver.yaml: + +``` +form_secret: + +user_consent: + template_dir: docs/privacy_policy_templates + default_version: 1.0 +``` + +You should then be able to enable the `consent` resource under a `listener` +entry. For example: + +``` +listeners: + - port: 8008 + resources: + - names: [client, consent] +``` diff --git a/docs/privacy_policy_templates/en/1.0.html b/docs/privacy_policy_templates/en/1.0.html new file mode 100644 index 0000000000..ab8666f0c3 --- /dev/null +++ b/docs/privacy_policy_templates/en/1.0.html @@ -0,0 +1,17 @@ + + + + Matrix.org Privacy policy + + +

+ All your base are belong to us. +

+
+ + + + +
+ + diff --git a/docs/privacy_policy_templates/en/success.html b/docs/privacy_policy_templates/en/success.html new file mode 100644 index 0000000000..d55e90c94f --- /dev/null +++ b/docs/privacy_policy_templates/en/success.html @@ -0,0 +1,11 @@ + + + + Matrix.org Privacy policy + + +

+ Sweet. +

+ + diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a0e465d644..730271628e 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -41,6 +41,7 @@ from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource +from synapse.rest.consent.consent_resource import ConsentResource from synapse.rest.key.v1.server_key_resource import LocalKey from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.media.v0.content_repository import ContentRepoResource @@ -182,6 +183,14 @@ class SynapseHomeServer(HomeServer): "/_matrix/client/versions": client_resource, }) + if name == "consent": + consent_resource = ConsentResource(self) + if compress: + consent_resource = gz_wrap(consent_resource) + resources.update({ + "/_matrix/consent": consent_resource, + }) + if name == "federation": resources.update({ FEDERATION_PREFIX: TransportLayerServer(self), diff --git a/synapse/config/__init__.py b/synapse/config/__init__.py index bfebb0f644..f2a5a41e92 100644 --- a/synapse/config/__init__.py +++ b/synapse/config/__init__.py @@ -12,3 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from ._base import ConfigError + +# export ConfigError if somebody does import * +# this is largely a fudge to stop PEP8 moaning about the import +__all__ = ["ConfigError"] diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py new file mode 100644 index 0000000000..675fce0911 --- /dev/null +++ b/synapse/config/consent_config.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config + +DEFAULT_CONFIG = """\ +# User Consent configuration +# +# uncomment and configure if enabling the 'consent' resource under 'listeners'. +# +# 'template_dir' gives the location of the templates for the HTML forms. +# This directory should contain one subdirectory per language (eg, 'en', 'fr'), +# and each language directory should contain the policy document (named as +# '.html') and a success page (success.html). +# +# 'default_version' gives the version of the policy document to serve up if +# there is no 'v' parameter. +# +# user_consent: +# template_dir: res/templates/privacy +# default_version: 1.0 +""" + + +class ConsentConfig(Config): + def read_config(self, config): + self.consent_config = config.get("user_consent") + + def default_config(self, **kwargs): + return DEFAULT_CONFIG diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index bf19cfee29..fb6bd3b421 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from .tls import TlsConfig from .server import ServerConfig from .logger import LoggingConfig @@ -37,6 +37,7 @@ from .push import PushConfig from .spam_checker import SpamCheckerConfig from .groups import GroupsConfig from .user_directory import UserDirectoryConfig +from .consent_config import ConsentConfig class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, @@ -45,12 +46,13 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, AppServiceConfig, KeyConfig, SAML2Config, CasConfig, JWTConfig, PasswordConfig, EmailConfig, WorkerConfig, PasswordAuthProviderConfig, PushConfig, - SpamCheckerConfig, GroupsConfig, UserDirectoryConfig,): + SpamCheckerConfig, GroupsConfig, UserDirectoryConfig, + ConsentConfig): pass if __name__ == '__main__': import sys sys.stdout.write( - HomeServerConfig().generate_config(sys.argv[1], sys.argv[2])[0] + HomeServerConfig().generate_config(sys.argv[1], sys.argv[2], True)[0] ) diff --git a/synapse/config/key.py b/synapse/config/key.py index 4b8fc063d0..d1382ad9ac 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -59,14 +59,20 @@ class KeyConfig(Config): self.expire_access_token = config.get("expire_access_token", False) + # a secret which is used to calculate HMACs for form values, to stop + # falsification of values + self.form_secret = config.get("form_secret", None) + def default_config(self, config_dir_path, server_name, is_generating_file=False, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) if is_generating_file: macaroon_secret_key = random_string_with_symbols(50) + form_secret = '"%s"' % random_string_with_symbols(50) else: macaroon_secret_key = None + form_secret = 'null' return """\ macaroon_secret_key: "%(macaroon_secret_key)s" @@ -74,6 +80,10 @@ class KeyConfig(Config): # Used to enable access token expiration. expire_access_token: False + # a secret which is used to calculate HMACs for form values, to stop + # falsification of values + form_secret: %(form_secret)s + ## Signing Keys ## # Path to the signing key to sign messages with diff --git a/synapse/http/server.py b/synapse/http/server.py index f29e36f490..a38209770d 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import cgi +from six.moves import http_client from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes @@ -44,6 +45,18 @@ import simplejson logger = logging.getLogger(__name__) +HTML_ERROR_TEMPLATE = """ + + + + Error {code} + + +

{msg}

+ + +""" + def wrap_json_request_handler(h): """Wraps a request handler method with exception handling. @@ -104,6 +117,65 @@ def wrap_json_request_handler(h): return wrap_request_handler_with_logging(wrapped_request_handler) +def wrap_html_request_handler(h): + """Wraps a request handler method with exception handling. + + Also adds logging as per wrap_request_handler_with_logging. + + The handler method must have a signature of "handle_foo(self, request)", + where "self" must have a "clock" attribute (and "request" must be a + SynapseRequest). + """ + def wrapped_request_handler(self, request): + d = defer.maybeDeferred(h, self, request) + d.addErrback(_return_html_error, request) + return d + + return wrap_request_handler_with_logging(wrapped_request_handler) + + +def _return_html_error(f, request): + """Sends an HTML error page corresponding to the given failure + + Args: + f (twisted.python.failure.Failure): + request (twisted.web.iweb.IRequest): + """ + if f.check(CodeMessageException): + cme = f.value + code = cme.code + msg = cme.msg + + if isinstance(cme, SynapseError): + logger.info( + "%s SynapseError: %s - %s", request, code, msg + ) + else: + logger.error( + "Failed handle request %r: %s", + request, + f.getTraceback().rstrip(), + ) + else: + code = http_client.INTERNAL_SERVER_ERROR + msg = "Internal server error" + + logger.error( + "Failed handle request %r: %s", + request, + f.getTraceback().rstrip(), + ) + + body = HTML_ERROR_TEMPLATE.format( + code=code, msg=cgi.escape(msg), + ).encode("utf-8") + request.setResponseCode(code) + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%i" % (len(body),)) + request.write(body) + finish_request(request) + + def wrap_request_handler_with_logging(h): """Wraps a request handler to provide logging and metrics @@ -134,7 +206,7 @@ def wrap_request_handler_with_logging(h): servlet_name = self.__class__.__name__ with request.processing(servlet_name): with PreserveLoggingContext(request_context): - d = h(self, request) + d = defer.maybeDeferred(h, self, request) # record the arrival of the request *after* # dispatching to the handler, so that the handler diff --git a/synapse/rest/consent/__init__.py b/synapse/rest/consent/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py new file mode 100644 index 0000000000..d791302278 --- /dev/null +++ b/synapse/rest/consent/consent_resource.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hashlib import sha256 +import hmac +import logging +from os import path +from six.moves import http_client + +import jinja2 +from jinja2 import TemplateNotFound +from twisted.internet import defer +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + +from synapse.api.errors import NotFoundError, SynapseError, StoreError +from synapse.config import ConfigError +from synapse.http.server import ( + finish_request, + wrap_html_request_handler, +) +from synapse.http.servlet import parse_string +from synapse.types import UserID + + +# language to use for the templates. TODO: figure this out from Accept-Language +TEMPLATE_LANGUAGE = "en" + +logger = logging.getLogger(__name__) + +# use hmac.compare_digest if we have it (python 2.7.7), else just use equality +if hasattr(hmac, "compare_digest"): + compare_digest = hmac.compare_digest +else: + def compare_digest(a, b): + return a == b + + +class ConsentResource(Resource): + """A twisted Resource to display a privacy policy and gather consent to it + + When accessed via GET, returns the privacy policy via a template. + + When accessed via POST, records the user's consent in the database and + displays a success page. + + The config should include a template_dir setting which contains templates + for the HTML. The directory should contain one subdirectory per language + (eg, 'en', 'fr'), and each language directory should contain the policy + document (named as '.html') and a success page (success.html). + + Both forms take a set of parameters from the browser. For the POST form, + these are normally sent as form parameters (but may be query-params); for + GET requests they must be query params. These are: + + u: the complete mxid, or the localpart of the user giving their + consent. Required for both GET (where it is used as an input to the + template) and for POST (where it is used to find the row in the db + to update). + + h: hmac_sha256(secret, u), where 'secret' is the privacy_secret in the + config file. If it doesn't match, the request is 403ed. + + v: the version of the privacy policy being agreed to. + + For GET: optional, and defaults to whatever was set in the config + file. Used to choose the version of the policy to pick from the + templates directory. + + For POST: required; gives the value to be recorded in the database + against the user. + """ + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): homeserver + """ + Resource.__init__(self) + + self.hs = hs + self.store = hs.get_datastore() + + # this is required by the request_handler wrapper + self.clock = hs.get_clock() + + consent_config = hs.config.consent_config + if consent_config is None: + raise ConfigError( + "Consent resource is enabled but user_consent section is " + "missing in config file.", + ) + + # daemonize changes the cwd to /, so make the path absolute now. + consent_template_directory = path.abspath( + consent_config["template_dir"], + ) + if not path.isdir(consent_template_directory): + raise ConfigError( + "Could not find template directory '%s'" % ( + consent_template_directory, + ), + ) + + loader = jinja2.FileSystemLoader(consent_template_directory) + self._jinja_env = jinja2.Environment(loader=loader) + + self._default_consent_verison = consent_config["default_version"] + + if hs.config.form_secret is None: + raise ConfigError( + "Consent resource is enabled but form_secret is not set in " + "config file. It should be set to an arbitrary secret string.", + ) + + self._hmac_secret = hs.config.form_secret.encode("utf-8") + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @wrap_html_request_handler + def _async_render_GET(self, request): + """ + Args: + request (twisted.web.http.Request): + """ + + version = parse_string(request, "v", + default=self._default_consent_verison) + username = parse_string(request, "u", required=True) + userhmac = parse_string(request, "h", required=True) + + self._check_hash(username, userhmac) + + try: + self._render_template( + request, "%s.html" % (version,), + user=username, userhmac=userhmac, version=version, + ) + except TemplateNotFound: + raise NotFoundError("Unknown policy version") + + def render_POST(self, request): + self._async_render_POST(request) + return NOT_DONE_YET + + @wrap_html_request_handler + @defer.inlineCallbacks + def _async_render_POST(self, request): + """ + Args: + request (twisted.web.http.Request): + """ + version = parse_string(request, "v", required=True) + username = parse_string(request, "u", required=True) + userhmac = parse_string(request, "h", required=True) + + self._check_hash(username, userhmac) + + if username.startswith('@'): + qualified_user_id = username + else: + qualified_user_id = UserID(username, self.hs.hostname).to_string() + + try: + yield self.store.user_set_consent_version(qualified_user_id, version) + except StoreError as e: + if e.code != 404: + raise + raise NotFoundError("Unknown user") + + try: + self._render_template(request, "success.html") + except TemplateNotFound: + raise NotFoundError("success.html not found") + + def _render_template(self, request, template_name, **template_args): + # get_template checks for ".." so we don't need to worry too much + # about path traversal here. + template_html = self._jinja_env.get_template( + path.join(TEMPLATE_LANGUAGE, template_name) + ) + html_bytes = template_html.render(**template_args).encode("utf8") + + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%i" % len(html_bytes)) + request.write(html_bytes) + finish_request(request) + + def _check_hash(self, userid, userhmac): + want_mac = hmac.new( + key=self._hmac_secret, + msg=userid, + digestmod=sha256, + ).hexdigest() + + if not compare_digest(want_mac, userhmac): + raise SynapseError(http_client.FORBIDDEN, "HMAC incorrect") diff --git a/synapse/server.py b/synapse/server.py index ebdea6b0c4..21cde5b6fc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -97,6 +97,9 @@ class HomeServer(object): which must be implemented by the subclass. This code may call any of the required "get" methods on the instance to obtain the sub-dependencies that one requires. + + Attributes: + config (synapse.config.homeserver.HomeserverConfig): """ DEPENDENCIES = [ diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index a50717db2d..6ffc397861 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -286,6 +286,24 @@ class RegistrationStore(RegistrationWorkerStore, "user_set_password_hash", user_set_password_hash_txn ) + def user_set_consent_version(self, user_id, consent_version): + """Updates the user table to record privacy policy consent + + Args: + user_id (str): full mxid of the user to update + consent_version (str): version of the policy the user has consented + to + + Raises: + StoreError(404) if user not found + """ + return self._simple_update_one( + table='users', + keyvalues={'name': user_id, }, + updatevalues={'consent_version': consent_version, }, + desc="user_set_consent_version" + ) + def user_delete_access_tokens(self, user_id, except_token_id=None, device_id=None): """ diff --git a/synapse/storage/schema/delta/48/add_user_consent.sql b/synapse/storage/schema/delta/48/add_user_consent.sql new file mode 100644 index 0000000000..5237491506 --- /dev/null +++ b/synapse/storage/schema/delta/48/add_user_consent.sql @@ -0,0 +1,18 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* record the version of the privacy policy the user has consented to + */ +ALTER TABLE users ADD COLUMN consent_version TEXT; -- cgit 1.4.1 From 05ac15ae824cc538b869e3cc8db7af2ac22e6754 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 15 May 2018 17:01:33 +0100 Subject: Limit query load of generate_user_daily_visits The aim is to keep track of when it was last called and only query from that point in time --- synapse/app/homeserver.py | 21 ++++++---------- synapse/storage/__init__.py | 60 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 27 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index bfc79a5e81..f25eaf9ffc 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -476,23 +476,16 @@ def run(hs): " changes across releases." ) - # def recurring_user_daily_visit_stats(): - def generate_user_daily_visit_stats(): hs.get_datastore().generate_user_daily_visits() - # Since user daily stats are bucketed at midnight UTC, - # and user_ips.last_seen can be updated at any time, it is important to call - # generate_user_daily_visit_stats immediately prior to the day end. Assuming - # an hourly cadence, the simplist way is to allign all calls to the hour - # end - end_of_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) \ - + datetime.timedelta(hours=1) \ - - datetime.timedelta(seconds=10) # Ensure method fires before day transistion - - time_to_next_hour = end_of_hour - datetime.datetime.now() - clock.call_later(time_to_next_hour.seconds, - clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000)) + def recurring_user_daily_visit_stats(): + clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000) + + # Rather than update on per session basis, batch up the requests. + # If you increase the loop period, the accuracy of user_daily_visits + # table will decrease + clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6949876c13..52f176a03c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -214,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore, self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() + # Used in _generate_user_daily_visits to keep track of progress + self._last_user_visit_update = self._get_start_of_day() + super(DataStore, self).__init__(db_conn, hs) def take_presence_startup_info(self): @@ -348,27 +351,58 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) + def _get_start_of_day(self): + """ + Returns millisecond unixtime for start of UTC day. + """ + now = datetime.datetime.utcnow() + today_start = datetime.datetime(now.year, now.month, + now.day, tzinfo=tz.tzutc()) + return int(time.mktime(today_start.timetuple())) * 1000 + def generate_user_daily_visits(self): """ Generates daily visit data for use in cohort/ retention analysis """ def _generate_user_daily_visits(txn): + logger.info("Calling _generate_user_daily_visits") + today_start = self._get_start_of_day() + a_day_in_milliseconds = 24 * 60 * 60 * 1000 - # determine timestamp of the day start - now = datetime.datetime.utcnow() - today_start = datetime.datetime(now.year, now.month, - now.day, tzinfo=tz.tzutc()) - today_start_time = int(time.mktime(today_start.timetuple())) * 1000 - logger.info(today_start_time) sql = """ INSERT INTO user_daily_visits (user_id, device_id, timestamp) - SELECT user_id, device_id, ? - FROM user_ips AS u - LEFT JOIN user_daily_visits USING (user_id, device_id) - WHERE last_seen > ? AND timestamp IS NULL - GROUP BY user_id, device_id; - """ - txn.execute(sql, (today_start_time, today_start_time)) + SELECT u.user_id, u.device_id, ? + FROM user_ips AS u + LEFT JOIN ( + SELECT user_id, device_id, timestamp FROM user_daily_visits + WHERE timestamp IS ? + ) udv + ON u.user_id = udv.user_id AND u.device_id=udv.device_id + WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL + """ + + # This means that the day has rolled over but there could still + # be entries from the previous day. There is an edge case + # where if the user logs in at 23:59 and overwrites their + # last_seen at 00:01 then they will not be counted in the + # previous day's stats - it is important that the query is run + # to minimise this case. + if today_start > self._last_user_visit_update: + yesterday_start = today_start - a_day_in_milliseconds + txn.execute(sql, (yesterday_start, yesterday_start, + self._last_user_visit_update, today_start)) + self._last_user_visit_update = today_start + + txn.execute(sql, (today_start, today_start, + self._last_user_visit_update, + today_start + a_day_in_milliseconds)) + # Update _last_user_visit_update to now. The reason to do this + # rather just clamping to the beginning of the day is to limit + # the size of the join - meaning that the query can be run more + # frequently + + now = datetime.datetime.utcnow() + self._last_user_visit_update = int(time.mktime(now.timetuple())) * 1000 return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From c92a8aa5780a71b5ab0227f204606004b16ff7fb Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 15 May 2018 17:31:11 +0100 Subject: pep8 --- synapse/app/homeserver.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f25eaf9ffc..41ee8b0a69 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -17,7 +17,6 @@ import gc import logging import os import sys -import datetime import synapse import synapse.config.logger -- cgit 1.4.1 From a2204cc9cc3f9edb9ae6c1f564b8f315e62d1978 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 16 May 2018 09:47:20 +0100 Subject: remove unused method recurring_user_daily_visit_stats --- synapse/app/homeserver.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 41ee8b0a69..a4648b84d5 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -478,9 +478,6 @@ def run(hs): def generate_user_daily_visit_stats(): hs.get_datastore().generate_user_daily_visits() - def recurring_user_daily_visit_stats(): - clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000) - # Rather than update on per session basis, batch up the requests. # If you increase the loop period, the accuracy of user_daily_visits # table will decrease -- cgit 1.4.1 From 02c1d29133a2ce3cdb5d1b23ec5edde49d813da9 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 21 May 2018 17:02:20 -0500 Subject: look at the Prometheus metrics instead --- synapse/app/homeserver.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 75f40fd5a4..6393804a86 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -34,8 +34,7 @@ from synapse.module_api import ModuleApi from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect from synapse.http.site import SynapseSite -from synapse.metrics import register_memory_metrics -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics.resource import METRICS_PREFIX from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ check_requirements from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX @@ -221,7 +220,8 @@ class SynapseHomeServer(HomeServer): resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self) if name == "metrics" and self.get_config().enable_metrics: - resources[METRICS_PREFIX] = MetricsResource(self) + from prometheus_client.twisted import MetricsResource + resources[METRICS_PREFIX] = MetricsResource() if name == "replication": resources[REPLICATION_PREFIX] = ReplicationRestResource(self) @@ -353,8 +353,6 @@ def setup(config_options): hs.get_datastore().start_doing_background_updates() hs.get_federation_client().start_get_pdu_cache() - register_memory_metrics(hs) - reactor.callWhenRunning(start) return hs -- cgit 1.4.1 From b5b2d5d64b2690e250fb56e851d5556c09d13f86 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 22 May 2018 14:00:23 +0100 Subject: Fix dependency on jinja2 Delay the import of ConsentResource, so that we can get away without jinja2 if people don't have the consent resource enabled. Fixes #3259 --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index bceb21a8d5..caccbaa814 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -41,7 +41,6 @@ from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource -from synapse.rest.consent.consent_resource import ConsentResource from synapse.rest.key.v1.server_key_resource import LocalKey from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.media.v0.content_repository import ContentRepoResource @@ -186,6 +185,7 @@ class SynapseHomeServer(HomeServer): }) if name == "consent": + from synapse.rest.consent.consent_resource import ConsentResource consent_resource = ConsentResource(self) if compress: consent_resource = gz_wrap(consent_resource) -- cgit 1.4.1 From 85ba83eb5100abf02cf373d9a8d5010526facd45 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 22 May 2018 16:28:23 -0500 Subject: fixes --- synapse/app/homeserver.py | 6 +++-- synapse/federation/transaction_queue.py | 6 ++--- synapse/metrics/__init__.py | 12 ++++++++-- synapse/notifier.py | 6 ++--- synapse/push/httppusher.py | 4 ++-- synapse/util/caches/__init__.py | 40 ++++++++++++++++++++++++--------- synapse/util/caches/descriptors.py | 2 +- 7 files changed, 52 insertions(+), 24 deletions(-) (limited to 'synapse/app/homeserver.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a5b135193f..449bfacdb9 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -34,6 +34,7 @@ from synapse.module_api import ModuleApi from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect from synapse.http.site import SynapseSite +from synapse.metrics import RegistryProxy from synapse.metrics.resource import METRICS_PREFIX from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ check_requirements @@ -60,6 +61,8 @@ from twisted.web.resource import EncodingResourceWrapper, NoResource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File +from prometheus_client.twisted import MetricsResource + logger = logging.getLogger("synapse.app.homeserver") @@ -229,8 +232,7 @@ class SynapseHomeServer(HomeServer): resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self) if name == "metrics" and self.get_config().enable_metrics: - from prometheus_client.twisted import MetricsResource - resources[METRICS_PREFIX] = MetricsResource() + resources[METRICS_PREFIX] = MetricsResource(RegistryProxy()) if name == "replication": resources[REPLICATION_PREFIX] = ReplicationRestResource(self) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 778924a13c..2049351fdd 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -64,7 +64,7 @@ class TransactionQueue(object): # done self.pending_transactions = {} - LaterGauge("pending_destinations", "", [], + LaterGauge("synapse_federation_client_pending_destinations", "", [], lambda: len(self.pending_transactions), ) @@ -89,11 +89,11 @@ class TransactionQueue(object): self.pending_edus_keyed_by_dest = edus_keyed = {} LaterGauge( - "pending_pdus", "", [], + "synapse_federation_client_pending_pdus", "", [], lambda: sum(map(len, pdus.values())), ) LaterGauge( - "pending_edus", "", [], + "synapse_federation_client_pending_edus", "", [], lambda: ( sum(map(len, edus.values())) + sum(map(len, presence.values())) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index ab0b921497..38408efb54 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -29,12 +29,20 @@ from twisted.internet import reactor logger = logging.getLogger(__name__) - running_on_pypy = platform.python_implementation() == 'PyPy' all_metrics = [] all_collectors = [] all_gauges = {} + +class RegistryProxy(object): + + def collect(self): + for metric in REGISTRY.collect(): + if not metric.name.startswith("__"): + yield metric + + @attr.s(hash=True) class LaterGauge(object): @@ -45,7 +53,7 @@ class LaterGauge(object): def collect(self): - g = GaugeMetricFamily(self.name, self.desc, self.labels) + g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) try: calls = self.caller() diff --git a/synapse/notifier.py b/synapse/notifier.py index 123e6f1840..40cc553918 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -194,14 +194,14 @@ class Notifier(object): all_user_streams.add(x) return sum(stream.count_listeners() for stream in all_user_streams) - LaterGauge("listeners", "", [], count_listeners) + LaterGauge("synapse_notifier_listeners", "", [], count_listeners) LaterGauge( - "rooms", "", [], + "synapse_notifier_rooms", "", [], lambda: count(bool, self.room_to_user_streams.values()), ) LaterGauge( - "users", "", [], + "synapse_notifier_users", "", [], lambda: len(self.user_to_user_stream), ) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index e22088ad6f..bf7ff74a1a 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -28,9 +28,9 @@ from prometheus_client import Counter logger = logging.getLogger(__name__) -http_push_processed_counter = Counter("http_pushes_processed", "") +http_push_processed_counter = Counter("synapse_http_httppusher_http_pushes_processed", "") -http_push_failed_counter = Counter("http_pushes_failed", "") +http_push_failed_counter = Counter("synapse_http_httppusher_http_pushes_failed", "") class HttpPusher(object): diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 438dcddf55..1c511a7072 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from prometheus_client.core import GaugeMetricFamily, REGISTRY +from prometheus_client.core import Gauge, REGISTRY, GaugeMetricFamily import os @@ -22,10 +22,20 @@ CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) caches_by_name = {} collectors_by_name = {} -def register_cache(name, cache_name, cache): +cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"]) +cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) +cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"]) +cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"]) + +response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"]) +response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"]) +response_cache_evicted = Gauge("synapse_util_caches_response_cache:evicted_size", "", ["name"]) +response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"]) + +def register_cache(cache_type, cache_name, cache): # Check if the metric is already registered. Unregister it, if so. - metric_name = "synapse_util_caches_%s:%s" % (name, cache_name,) + metric_name = "cache_%s_%s" % (cache_type, cache_name,) if metric_name in collectors_by_name.keys(): REGISTRY.unregister(collectors_by_name[metric_name]) @@ -44,15 +54,22 @@ def register_cache(name, cache_name, cache): def inc_evictions(self, size=1): self.evicted_size += size - def collect(self): - cache_size = len(cache) + def describe(self): + return [] - gm = GaugeMetricFamily(metric_name, "", labels=["size", "hits", "misses", "total"]) - gm.add_metric(["size"], cache_size) - gm.add_metric(["hits"], self.hits) - gm.add_metric(["misses"], self.misses) - gm.add_metric(["total"], self.hits + self.misses) - yield gm + def collect(self): + if cache_type == "response_cache": + response_cache_size.labels(cache_name).set(len(cache)) + response_cache_hits.labels(cache_name).set(self.hits) + response_cache_evicted.labels(cache_name).set(self.evicted_size) + response_cache_total.labels(cache_name).set(self.hits + self.misses) + else: + cache_size.labels(cache_name).set(len(cache)) + cache_hits.labels(cache_name).set(self.hits) + cache_evicted.labels(cache_name).set(self.evicted_size) + cache_total.labels(cache_name).set(self.hits + self.misses) + + yield GaugeMetricFamily("__unused", "") metric = CacheMetric() REGISTRY.register(metric) @@ -60,6 +77,7 @@ def register_cache(name, cache_name, cache): collectors_by_name[metric_name] = metric return metric + KNOWN_KEYS = { key: key for key in ( diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index a4188eb099..8a9dcb2fc2 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -80,7 +80,7 @@ class Cache(object): self.name = name self.keylen = keylen self.thread = None - self.metrics = register_cache("descriptor", name, self.cache) + self.metrics = register_cache("cache", name, self.cache) def _on_evicted(self, evicted_count): self.metrics.inc_evictions(evicted_count) -- cgit 1.4.1