diff options
author | Olivier Wilkinson (reivilibre) <olivier@librepush.net> | 2019-08-08 11:25:02 +0100 |
---|---|---|
committer | Olivier Wilkinson (reivilibre) <olivier@librepush.net> | 2019-08-08 11:25:02 +0100 |
commit | 259014b7ad1186927dea001e9e840ede3e80c912 (patch) | |
tree | cf82623052002d22c37abe8ff631b2cc3d3864c1 | |
parent | Fix logging in workers (#5729) (diff) | |
download | synapse-259014b7ad1186927dea001e9e840ede3e80c912.tar.xz |
Schema delta for separated statistics
Separated by current/historical Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
-rw-r--r-- | synapse/storage/schema/delta/56/stats_separated.sql | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/synapse/storage/schema/delta/56/stats_separated.sql b/synapse/storage/schema/delta/56/stats_separated.sql new file mode 100644 index 0000000000..5fc584cede --- /dev/null +++ b/synapse/storage/schema/delta/56/stats_separated.sql @@ -0,0 +1,184 @@ +/* Copyright 2018 New Vector Ltd + * Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +----- First clean up from previous versions of room stats. + +-- First remove old stats stuff +DROP TABLE IF EXISTS room_stats; +DROP TABLE IF EXISTS user_stats; +DROP TABLE IF EXISTS room_stats_earliest_tokens; +DROP TABLE IF EXISTS _temp_populate_stats_position; +DROP TABLE IF EXISTS _temp_populate_stats_rooms; +DROP TABLE IF EXISTS stats_stream_pos; + +-- Unschedule old background updates if they're still scheduled +DELETE FROM background_updates WHERE update_name IN ( + 'populate_stats_createtables', + 'populate_stats_process_rooms', + 'populate_stats_cleanup', + 'regen_stats' +); + +----- Create tables for our version of room stats. + +-- single-row table to track position of incremental updates +CREATE TABLE IF NOT EXISTS stats_incremental_position ( + -- the stream_id of the last-processed state delta + state_delta_stream_id BIGINT, + + -- the stream_ordering of the last-processed backfilled event + -- (this is negative) + total_events_min_stream_ordering BIGINT, + + -- the stream_ordering of the last-processed normally-created event + -- (this is positive) + total_events_max_stream_ordering BIGINT, + + -- If true, this represents the contract agreed upon by the background + -- population processor. + -- If false, this is suitable for use by the delta/incremental processor. + is_background_contract BOOLEAN NOT NULL PRIMARY KEY +); + +-- insert a null row and make sure it is the only one. +DELETE FROM stats_incremental_position; +INSERT INTO stats_incremental_position ( + state_delta_stream_id, + total_events_min_stream_ordering, + total_events_max_stream_ordering, + is_background_contract +) VALUES (NULL, NULL, NULL, FALSE), (NULL, NULL, NULL, TRUE); + +-- represents PRESENT room statistics for a room +CREATE TABLE IF NOT EXISTS room_stats_current ( + room_id TEXT NOT NULL PRIMARY KEY, + + -- These starts cover the time from start_ts…end_ts (in seconds). + -- Note that end_ts is quantised, and start_ts usually so. + start_ts BIGINT, + end_ts BIGINT, + + current_state_events INT NOT NULL DEFAULT 0, + total_events INT NOT NULL DEFAULT 0, + joined_members INT NOT NULL DEFAULT 0, + invited_members INT NOT NULL DEFAULT 0, + left_members INT NOT NULL DEFAULT 0, + banned_members INT NOT NULL DEFAULT 0, + + -- If initial background count is still to be performed: NULL + -- If initial background count has been performed: the maximum delta stream + -- position that this row takes into account. + completed_delta_stream_id BIGINT, + + CONSTRAINT timestamp_nullity_equality CHECK ((start_ts IS NULL) = (end_ts IS NULL)) +); + +-- TODO check: make it easier to find dirty rows +-- TODO check we specify 'AND start_ts IS NOT NULL' in old collector, to take +-- advantage of optimisations. +CREATE INDEX IF NOT EXISTS room_stats_current_dirty ON room_stats_current (start_ts) + WHERE start_ts IS NOT NULL; + +-- TODO check: make it easier to find incomplete rows +CREATE INDEX IF NOT EXISTS room_stats_not_complete ON room_stats_current (room_id) + WHERE completed_delta_stream_id IS NULL; + +-- represents HISTORICAL room statistics for a room +CREATE TABLE IF NOT EXISTS room_stats_historical ( + room_id TEXT NOT NULL, + -- These starts cover the time from (end_ts - bucket_size)…end_ts (in seconds). + -- Note that end_ts is quantised, and start_ts usually so. + end_ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + PRIMARY KEY (room_id, end_ts), + + current_state_events INT NOT NULL, + total_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL +); + +-- We use this index to speed up deletion of old user stats. +CREATE INDEX IF NOT EXISTS room_stats_historical_end_ts ON room_stats_historical (end_ts); + +-- We don't need an index on (room_id, end_ts) because PRIMARY KEY sorts that +-- out for us. (We would want it to review stats for a particular room.) + + +-- represents PRESENT statistics for a user +CREATE TABLE IF NOT EXISTS user_stats_current ( + user_id TEXT NOT NULL PRIMARY KEY, + + -- The timestamp that represents the start of the + start_ts BIGINT, + end_ts BIGINT, + + public_rooms INT DEFAULT 0 NOT NULL, + private_rooms INT DEFAULT 0 NOT NULL, + + -- If initial background count is still to be performed: NULL + -- If initial background count has been performed: the maximum delta stream + -- position that this row takes into account. + completed_delta_stream_id BIGINT +); + +CREATE INDEX IF NOT EXISTS user_stats_current_dirty ON user_stats_current (start_ts) + WHERE start_ts IS NOT NULL; + +-- TODO check: make it easier to find incomplete rows +CREATE INDEX IF NOT EXISTS user_stats_not_complete ON user_stats_current (user_id) + WHERE completed_delta_stream_id IS NULL; + +-- represents HISTORICAL statistics for a user +CREATE TABLE IF NOT EXISTS user_stats_historical ( + user_id TEXT NOT NULL, + end_ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + PRIMARY KEY (user_id, end_ts), + + public_rooms INT NOT NULL, + private_rooms INT NOT NULL +); + +-- We use this index to speed up deletion of old user stats. +CREATE INDEX IF NOT EXISTS user_stats_historical_end_ts ON user_stats_historical (end_ts); + +-- We don't need an index on (user_id, end_ts) because PRIMARY KEY sorts that +-- out for us. (We would want it to review stats for a particular user.) + +-- TODO old SQLites may not support partial indices + + +-- Set up staging tables +-- we depend on current_state_events_membership because this is used +-- in our counting. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_stats_prepare', '{}', 'current_state_events_membership'); + +-- Run through each room and update stats +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_rooms', '{}', 'populate_stats_prepare'); + +-- Run through each user and update stats. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_users', '{}', 'populate_stats_process_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_cleanup', '{}', 'populate_stats_process_users'); |