summary refs log tree commit diff
diff options
context:
space:
mode:
authorOlivier Wilkinson (reivilibre) <olivier@librepush.net>2019-08-08 11:25:02 +0100
committerOlivier Wilkinson (reivilibre) <olivier@librepush.net>2019-08-08 11:25:02 +0100
commit259014b7ad1186927dea001e9e840ede3e80c912 (patch)
treecf82623052002d22c37abe8ff631b2cc3d3864c1
parentFix logging in workers (#5729) (diff)
downloadsynapse-259014b7ad1186927dea001e9e840ede3e80c912.tar.xz
Schema delta for separated statistics
Separated by current/historical

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
-rw-r--r--synapse/storage/schema/delta/56/stats_separated.sql184
1 files changed, 184 insertions, 0 deletions
diff --git a/synapse/storage/schema/delta/56/stats_separated.sql b/synapse/storage/schema/delta/56/stats_separated.sql
new file mode 100644
index 0000000000..5fc584cede
--- /dev/null
+++ b/synapse/storage/schema/delta/56/stats_separated.sql
@@ -0,0 +1,184 @@
+/* Copyright 2018 New Vector Ltd
+ * Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+----- First clean up from previous versions of room stats.
+
+-- First remove old stats stuff
+DROP TABLE IF EXISTS room_stats;
+DROP TABLE IF EXISTS user_stats;
+DROP TABLE IF EXISTS room_stats_earliest_tokens;
+DROP TABLE IF EXISTS _temp_populate_stats_position;
+DROP TABLE IF EXISTS _temp_populate_stats_rooms;
+DROP TABLE IF EXISTS stats_stream_pos;
+
+-- Unschedule old background updates if they're still scheduled
+DELETE FROM background_updates WHERE update_name IN (
+    'populate_stats_createtables',
+    'populate_stats_process_rooms',
+    'populate_stats_cleanup',
+    'regen_stats'
+);
+
+----- Create tables for our version of room stats.
+
+-- single-row table to track position of incremental updates
+CREATE TABLE IF NOT EXISTS stats_incremental_position (
+    -- the stream_id of the last-processed state delta
+    state_delta_stream_id BIGINT,
+
+    -- the stream_ordering of the last-processed backfilled event
+    -- (this is negative)
+    total_events_min_stream_ordering BIGINT,
+
+    -- the stream_ordering of the last-processed normally-created event
+    -- (this is positive)
+    total_events_max_stream_ordering BIGINT,
+
+    -- If true, this represents the contract agreed upon by the background
+    -- population processor.
+    -- If false, this is suitable for use by the delta/incremental processor.
+    is_background_contract BOOLEAN NOT NULL PRIMARY KEY
+);
+
+-- insert a null row and make sure it is the only one.
+DELETE FROM stats_incremental_position;
+INSERT INTO stats_incremental_position (
+    state_delta_stream_id,
+    total_events_min_stream_ordering,
+    total_events_max_stream_ordering,
+    is_background_contract
+) VALUES (NULL, NULL, NULL, FALSE), (NULL, NULL, NULL, TRUE);
+
+-- represents PRESENT room statistics for a room
+CREATE TABLE IF NOT EXISTS room_stats_current (
+    room_id TEXT NOT NULL PRIMARY KEY,
+
+    -- These starts cover the time from start_ts…end_ts (in seconds).
+    -- Note that end_ts is quantised, and start_ts usually so.
+    start_ts BIGINT,
+    end_ts BIGINT,
+
+    current_state_events INT NOT NULL DEFAULT 0,
+    total_events INT NOT NULL DEFAULT 0,
+    joined_members INT NOT NULL DEFAULT 0,
+    invited_members INT NOT NULL DEFAULT 0,
+    left_members INT NOT NULL DEFAULT 0,
+    banned_members INT NOT NULL DEFAULT 0,
+
+    -- If initial background count is still to be performed: NULL
+    -- If initial background count has been performed: the maximum delta stream
+    --  position that this row takes into account.
+    completed_delta_stream_id BIGINT,
+
+    CONSTRAINT timestamp_nullity_equality CHECK ((start_ts IS NULL) = (end_ts IS NULL))
+);
+
+-- TODO check: make it easier to find dirty rows
+-- TODO check we specify 'AND start_ts IS NOT NULL' in old collector, to take
+-- advantage of optimisations.
+CREATE INDEX IF NOT EXISTS room_stats_current_dirty ON room_stats_current (start_ts)
+    WHERE start_ts IS NOT NULL;
+
+-- TODO check: make it easier to find incomplete rows
+CREATE INDEX IF NOT EXISTS room_stats_not_complete ON room_stats_current (room_id)
+    WHERE completed_delta_stream_id IS NULL;
+
+-- represents HISTORICAL room statistics for a room
+CREATE TABLE IF NOT EXISTS room_stats_historical (
+    room_id TEXT NOT NULL,
+    -- These starts cover the time from (end_ts - bucket_size)…end_ts (in seconds).
+    -- Note that end_ts is quantised, and start_ts usually so.
+    end_ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+    PRIMARY KEY (room_id, end_ts),
+
+    current_state_events INT NOT NULL,
+    total_events INT NOT NULL,
+    joined_members INT NOT NULL,
+    invited_members INT NOT NULL,
+    left_members INT NOT NULL,
+    banned_members INT NOT NULL
+);
+
+-- We use this index to speed up deletion of old user stats.
+CREATE INDEX IF NOT EXISTS room_stats_historical_end_ts ON room_stats_historical (end_ts);
+
+-- We don't need an index on (room_id, end_ts) because PRIMARY KEY sorts that
+-- out for us. (We would want it to review stats for a particular room.)
+
+
+-- represents PRESENT statistics for a user
+CREATE TABLE IF NOT EXISTS user_stats_current (
+    user_id TEXT NOT NULL PRIMARY KEY,
+
+    -- The timestamp that represents the start of the
+    start_ts BIGINT,
+    end_ts BIGINT,
+
+    public_rooms INT DEFAULT 0 NOT NULL,
+    private_rooms INT DEFAULT 0 NOT NULL,
+
+    -- If initial background count is still to be performed: NULL
+    -- If initial background count has been performed: the maximum delta stream
+    --  position that this row takes into account.
+    completed_delta_stream_id BIGINT
+);
+
+CREATE INDEX IF NOT EXISTS user_stats_current_dirty ON user_stats_current (start_ts)
+    WHERE start_ts IS NOT NULL;
+
+-- TODO check: make it easier to find incomplete rows
+CREATE INDEX IF NOT EXISTS user_stats_not_complete ON user_stats_current (user_id)
+    WHERE completed_delta_stream_id IS NULL;
+
+-- represents HISTORICAL statistics for a user
+CREATE TABLE IF NOT EXISTS user_stats_historical (
+    user_id TEXT NOT NULL,
+    end_ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+    PRIMARY KEY (user_id, end_ts),
+
+    public_rooms INT NOT NULL,
+    private_rooms INT NOT NULL
+);
+
+-- We use this index to speed up deletion of old user stats.
+CREATE INDEX IF NOT EXISTS user_stats_historical_end_ts ON user_stats_historical (end_ts);
+
+-- We don't need an index on (user_id, end_ts) because PRIMARY KEY sorts that
+-- out for us. (We would want it to review stats for a particular user.)
+
+-- TODO old SQLites may not support partial indices
+
+
+-- Set up staging tables
+-- we depend on current_state_events_membership because this is used
+-- in our counting.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+    ('populate_stats_prepare', '{}', 'current_state_events_membership');
+
+-- Run through each room and update stats
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_stats_process_rooms', '{}', 'populate_stats_prepare');
+
+-- Run through each user and update stats.
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_stats_process_users', '{}', 'populate_stats_process_rooms');
+
+-- Clean up staging tables
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_stats_cleanup', '{}', 'populate_stats_process_users');