summary refs log tree commit diff
path: root/synapse/storage/schema
diff options
context:
space:
mode:
authorOlivier Wilkinson (reivilibre) <olivier@librepush.net>2019-08-20 13:45:05 +0100
committerOlivier Wilkinson (reivilibre) <olivier@librepush.net>2019-08-20 13:45:05 +0100
commitd7675e79e13c1fd44dd97033d1e69f57811d75d1 (patch)
treea7994a97a5986bc4519965d4e9d4673e4fd18538 /synapse/storage/schema
parentTear out current room & user statistics (#5880) (diff)
downloadsynapse-d7675e79e13c1fd44dd97033d1e69f57811d75d1.tar.xz
Add schema for Separated Statistics
Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
Diffstat (limited to 'synapse/storage/schema')
-rw-r--r--synapse/storage/schema/delta/56/stats_separated1.sql115
-rw-r--r--synapse/storage/schema/delta/56/stats_separated2.py87
2 files changed, 202 insertions, 0 deletions
diff --git a/synapse/storage/schema/delta/56/stats_separated1.sql b/synapse/storage/schema/delta/56/stats_separated1.sql
index 5b125d17b0..8d8e8fb97c 100644
--- a/synapse/storage/schema/delta/56/stats_separated1.sql
+++ b/synapse/storage/schema/delta/56/stats_separated1.sql
@@ -31,3 +31,118 @@ DELETE FROM background_updates WHERE update_name IN (
     'populate_stats_process_rooms',
     'populate_stats_cleanup'
 );
+
+----- Create tables for our version of room stats.
+
+-- single-row table to track position of incremental updates
+CREATE TABLE IF NOT EXISTS stats_incremental_position (
+    -- the stream_id of the last-processed state delta
+    state_delta_stream_id BIGINT,
+
+    -- the stream_ordering of the last-processed backfilled event
+    -- (this is negative)
+    total_events_min_stream_ordering BIGINT,
+
+    -- the stream_ordering of the last-processed normally-created event
+    -- (this is positive)
+    total_events_max_stream_ordering BIGINT,
+
+    -- If true, this represents the contract agreed upon by the background
+    -- population processor.
+    -- If false, this is suitable for use by the delta/incremental processor.
+    is_background_contract BOOLEAN NOT NULL PRIMARY KEY
+);
+
+-- insert a null row and make sure it is the only one.
+DELETE FROM stats_incremental_position;
+INSERT INTO stats_incremental_position (
+    state_delta_stream_id,
+    total_events_min_stream_ordering,
+    total_events_max_stream_ordering,
+    is_background_contract
+) VALUES (NULL, NULL, NULL, (0 = 1)), (NULL, NULL, NULL, (1 = 1));
+
+-- represents PRESENT room statistics for a room
+CREATE TABLE IF NOT EXISTS room_stats_current (
+    room_id TEXT NOT NULL PRIMARY KEY,
+
+    -- These starts cover the time from start_ts...end_ts (in seconds).
+    -- Note that end_ts is quantised, and start_ts usually so.
+    start_ts BIGINT,
+    end_ts BIGINT,
+
+    current_state_events INT NOT NULL DEFAULT 0,
+    total_events INT NOT NULL DEFAULT 0,
+    joined_members INT NOT NULL DEFAULT 0,
+    invited_members INT NOT NULL DEFAULT 0,
+    left_members INT NOT NULL DEFAULT 0,
+    banned_members INT NOT NULL DEFAULT 0,
+
+    -- If initial background count is still to be performed: NULL
+    -- If initial background count has been performed: the maximum delta stream
+    --  position that this row takes into account.
+    completed_delta_stream_id BIGINT,
+
+    CONSTRAINT timestamp_nullity_equality CHECK ((start_ts IS NULL) = (end_ts IS NULL))
+);
+
+
+-- represents HISTORICAL room statistics for a room
+CREATE TABLE IF NOT EXISTS room_stats_historical (
+    room_id TEXT NOT NULL,
+    -- These stats cover the time from (end_ts - bucket_size)...end_ts (in seconds).
+    -- Note that end_ts is quantised, and start_ts usually so.
+    end_ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+
+    current_state_events INT NOT NULL,
+    total_events INT NOT NULL,
+    joined_members INT NOT NULL,
+    invited_members INT NOT NULL,
+    left_members INT NOT NULL,
+    banned_members INT NOT NULL,
+
+    PRIMARY KEY (room_id, end_ts)
+);
+
+-- We use this index to speed up deletion of ancient room stats.
+CREATE INDEX IF NOT EXISTS room_stats_historical_end_ts ON room_stats_historical (end_ts);
+
+-- We don't need an index on (room_id, end_ts) because PRIMARY KEY sorts that
+-- out for us. (We would want it to review stats for a particular room.)
+
+
+-- represents PRESENT statistics for a user
+CREATE TABLE IF NOT EXISTS user_stats_current (
+    user_id TEXT NOT NULL PRIMARY KEY,
+
+    -- The timestamp that represents the start of the
+    start_ts BIGINT,
+    end_ts BIGINT,
+
+    public_rooms INT DEFAULT 0 NOT NULL,
+    private_rooms INT DEFAULT 0 NOT NULL,
+
+    -- If initial background count is still to be performed: NULL
+    -- If initial background count has been performed: the maximum delta stream
+    --  position that this row takes into account.
+    completed_delta_stream_id BIGINT
+);
+
+-- represents HISTORICAL statistics for a user
+CREATE TABLE IF NOT EXISTS user_stats_historical (
+    user_id TEXT NOT NULL,
+    end_ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+
+    public_rooms INT NOT NULL,
+    private_rooms INT NOT NULL,
+
+    PRIMARY KEY (user_id, end_ts)
+);
+
+-- We use this index to speed up deletion of ancient user stats.
+CREATE INDEX IF NOT EXISTS user_stats_historical_end_ts ON user_stats_historical (end_ts);
+
+-- We don't need an index on (user_id, end_ts) because PRIMARY KEY sorts that
+-- out for us. (We would want it to review stats for a particular user.)
diff --git a/synapse/storage/schema/delta/56/stats_separated2.py b/synapse/storage/schema/delta/56/stats_separated2.py
new file mode 100644
index 0000000000..049867fa3e
--- /dev/null
+++ b/synapse/storage/schema/delta/56/stats_separated2.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This schema delta will be run after 'stats_separated1.sql' due to lexicographic
+# ordering. Note that it MUST be so.
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+
+
+def _run_create_generic(stats_type, cursor, database_engine):
+    """
+    Creates the pertinent (partial, if supported) indices for one kind of stats.
+    Args:
+        stats_type: "room" or "user" - the type of stats
+        cursor: Database Cursor
+        database_engine: Database Engine
+    """
+    if isinstance(database_engine, Sqlite3Engine):
+        # even though SQLite >= 3.8 can support partial indices, we won't enable
+        # them, in case the SQLite database may be later used on another system.
+        # It's also the case that SQLite is only likely to be used in small
+        # deployments or testing, where the optimisations gained by use of a
+        # partial index are not a big concern.
+        cursor.execute(
+            """
+                CREATE INDEX IF NOT EXISTS %s_stats_current_dirty
+                    ON %s_stats_current (end_ts);
+            """
+            % (stats_type, stats_type)
+        )
+        cursor.execute(
+            """
+                CREATE INDEX IF NOT EXISTS %s_stats_not_complete
+                    ON %s_stats_current (completed_delta_stream_id, %s_id);
+            """
+            % (stats_type, stats_type, stats_type)
+        )
+    elif isinstance(database_engine, PostgresEngine):
+        # This partial index helps us with finding dirty stats rows
+        cursor.execute(
+            """
+                CREATE INDEX IF NOT EXISTS %s_stats_current_dirty
+                    ON %s_stats_current (end_ts)
+                    WHERE end_ts IS NOT NULL;
+            """
+            % (stats_type, stats_type)
+        )
+        # This partial index helps us with old collection
+        cursor.execute(
+            """
+                CREATE INDEX IF NOT EXISTS %s_stats_not_complete
+                    ON %s_stats_current (%s_id)
+                    WHERE completed_delta_stream_id IS NULL;
+            """
+            % (stats_type, stats_type, stats_type)
+        )
+    else:
+        raise NotImplementedError("Unknown database engine.")
+
+
+def run_create(cursor, database_engine):
+    """
+    This function is called as part of the schema delta.
+    It will create indices - partial, if supported - for the new 'separated'
+    room & user statistics.
+    """
+    _run_create_generic("room", cursor, database_engine)
+    _run_create_generic("user", cursor, database_engine)
+
+
+def run_upgrade(cur, database_engine, config):
+    """
+    This function is run on a database upgrade (of a non-empty database).
+    We have no need to do anything specific here.
+    """
+    pass