diff options
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/__init__.py | 28 | ||||
-rw-r--r-- | synapse/storage/client_ips.py | 23 | ||||
-rw-r--r-- | synapse/storage/monthly_active_users.py | 124 | ||||
-rw-r--r-- | synapse/storage/prepare_database.py | 2 | ||||
-rw-r--r-- | synapse/storage/schema/delta/51/monthly_active_users.sql | 23 |
5 files changed, 172 insertions, 28 deletions
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 134e4a80f1..23b4a8d76d 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -39,6 +39,7 @@ from .filtering import FilteringStore from .group_server import GroupServerStore from .keys import KeyStore from .media_repository import MediaRepositoryStore +from .monthly_active_users import MonthlyActiveUsersStore from .openid import OpenIdStore from .presence import PresenceStore, UserPresenceState from .profile import ProfileStore @@ -87,6 +88,7 @@ class DataStore(RoomMemberStore, RoomStore, UserDirectoryStore, GroupServerStore, UserErasureStore, + MonthlyActiveUsersStore, ): def __init__(self, db_conn, hs): @@ -94,7 +96,6 @@ class DataStore(RoomMemberStore, RoomStore, self._clock = hs.get_clock() self.database_engine = hs.database_engine - self.db_conn = db_conn self._stream_id_gen = StreamIdGenerator( db_conn, "events", "stream_ordering", extra_tables=[("local_invites", "stream_id")] @@ -267,31 +268,6 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_users", _count_users) - def count_monthly_users(self): - """Counts the number of users who used this homeserver in the last 30 days - - This method should be refactored with count_daily_users - the only - reason not to is waiting on definition of mau - - Returns: - Defered[int] - """ - def _count_monthly_users(txn): - thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) - sql = """ - SELECT COALESCE(count(*), 0) FROM ( - SELECT user_id FROM user_ips - WHERE last_seen > ? - GROUP BY user_id - ) u - """ - - txn.execute(sql, (thirty_days_ago,)) - count, = txn.fetchone() - return count - - return self.runInteraction("count_monthly_users", _count_monthly_users) - def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index b8cefd43d6..83d64d1563 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -35,6 +35,7 @@ LAST_SEEN_GRANULARITY = 120 * 1000 class ClientIpStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): + self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, @@ -74,6 +75,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + @defer.inlineCallbacks def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id, now=None): if not now: @@ -84,7 +86,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_seen = self.client_ip_last_seen.get(key) except KeyError: last_seen = None - + yield self._populate_monthly_active_users(user_id) # Rate-limited inserts if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return @@ -93,6 +95,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @defer.inlineCallbacks + def _populate_monthly_active_users(self, user_id): + """Checks on the state of monthly active user limits and optionally + add the user to the monthly active tables + + Args: + user_id(str): the user_id to query + """ + + store = self.hs.get_datastore() + if self.hs.config.limit_usage_by_mau: + is_user_monthly_active = yield store.is_user_monthly_active(user_id) + if is_user_monthly_active: + yield store.upsert_monthly_active_user(user_id) + else: + count = yield store.get_monthly_active_count() + if count < self.hs.config.max_mau_value: + yield store.upsert_monthly_active_user(user_id) + def _update_client_ips_batch(self): def update(): to_update = self._batch_row_update diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py new file mode 100644 index 0000000000..2872ba4cae --- /dev/null +++ b/synapse/storage/monthly_active_users.py @@ -0,0 +1,124 @@ +from twisted.internet import defer +from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +from ._base import SQLBaseStore + + +class MonthlyActiveUsersStore(SQLBaseStore): + def __init__(self, dbconn, hs): + super(MonthlyActiveUsersStore, self).__init__(None, hs) + self._clock = hs.get_clock() + self.max_mau_value = hs.config.max_mau_value + + def reap_monthly_active_users(self): + """ + Cleans out monthly active user table to ensure that no stale + entries exist. + Return: + Defered() + """ + def _reap_users(txn): + thirty_days_ago = ( + int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) + ) + + if isinstance(self.database_engine, PostgresEngine): + sql = """ + DELETE FROM monthly_active_users + WHERE timestamp < ? + RETURNING user_id + """ + txn.execute(sql, (thirty_days_ago,)) + res = txn.fetchall() + for r in res: + self.is_user_monthly_active.invalidate(r) + + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + RETURNING user_id + """ + txn.execute(sql, (self.max_mau_value,)) + res = txn.fetchall() + for r in res: + self.is_user_monthly_active.invalidate(r) + print r + self.get_monthly_active_count.invalidate() + elif isinstance(self.database_engine, Sqlite3Engine): + sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" + + txn.execute(sql, (thirty_days_ago,)) + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + """ + txn.execute(sql, (self.max_mau_value,)) + + # It seems poor to invalidate the whole cache, but the alternative + # is to select then delete which has its own problems. + # It seems unlikely that anyone using this feature on large datasets + # would be using sqlite and if they are then there will be + # larger perf issues than this one to encourage an upgrade to postgres. + + self.is_user_monthly_active.invalidate_all() + self.get_monthly_active_count.invalidate_all() + + return self.runInteraction("reap_monthly_active_users", _reap_users) + + @cachedInlineCallbacks(num_args=0) + def get_monthly_active_count(self): + """ + Generates current count of monthly active users.abs + Return: + Defered(int): Number of current monthly active users + """ + def _count_users(txn): + sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users" + + txn.execute(sql) + count, = txn.fetchone() + return count + return self.runInteraction("count_users", _count_users) + + def upsert_monthly_active_user(self, user_id): + """ + Updates or inserts monthly active user member + Arguments: + user_id (str): user to add/update + Deferred(bool): True if a new entry was created, False if an + existing one was updated. + """ + return self._simple_upsert( + desc="upsert_monthly_active_user", + table="monthly_active_users", + keyvalues={ + "user_id": user_id, + }, + values={ + "timestamp": int(self._clock.time_msec()), + }, + lock=False, + ) + + @cachedInlineCallbacks(num_args=1) + def is_user_monthly_active(self, user_id): + """ + Checks if a given user is part of the monthly active user group + Arguments: + user_id (str): user to add/update + Return: + bool : True if user part of group, False otherwise + """ + user_present = yield self._simple_select_onecol( + table="monthly_active_users", + keyvalues={ + "user_id": user_id, + }, + retcol="user_id", + desc="is_user_monthly_active", + ) + + defer.returnValue(bool(user_present)) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b290f834b3..b364719312 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 50 +SCHEMA_VERSION = 51 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql new file mode 100644 index 0000000000..f2b6d3e31e --- /dev/null +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -0,0 +1,23 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- a table of monthly active users, for use where blocking based on mau limits +CREATE TABLE monthly_active_users ( + user_id TEXT NOT NULL, + timestamp BIGINT NOT NULL +); + +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); |