From 314567d62d344492d867ea6587e3e7ad5470930d Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Mon, 12 Aug 2019 16:21:35 +0100 Subject: Split out partial indices from theschema delta, thus supporting SQLite. Signed-off-by: Olivier Wilkinson (reivilibre) --- .../storage/schema/delta/56/stats_separated.sql | 184 --------------------- .../storage/schema/delta/56/stats_separated1.sql | 182 ++++++++++++++++++++ .../storage/schema/delta/56/stats_separated2.py | 87 ++++++++++ 3 files changed, 269 insertions(+), 184 deletions(-) delete mode 100644 synapse/storage/schema/delta/56/stats_separated.sql create mode 100644 synapse/storage/schema/delta/56/stats_separated1.sql create mode 100644 synapse/storage/schema/delta/56/stats_separated2.py (limited to 'synapse') diff --git a/synapse/storage/schema/delta/56/stats_separated.sql b/synapse/storage/schema/delta/56/stats_separated.sql deleted file mode 100644 index 0e80c9f967..0000000000 --- a/synapse/storage/schema/delta/56/stats_separated.sql +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2018 New Vector Ltd - * Copyright 2019 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - ------ First clean up from previous versions of room stats. - --- First remove old stats stuff -DROP TABLE IF EXISTS room_stats; -DROP TABLE IF EXISTS user_stats; -DROP TABLE IF EXISTS room_stats_earliest_tokens; -DROP TABLE IF EXISTS _temp_populate_stats_position; -DROP TABLE IF EXISTS _temp_populate_stats_rooms; -DROP TABLE IF EXISTS stats_stream_pos; - --- Unschedule old background updates if they're still scheduled -DELETE FROM background_updates WHERE update_name IN ( - 'populate_stats_createtables', - 'populate_stats_process_rooms', - 'populate_stats_cleanup', - 'regen_stats' -); - ------ Create tables for our version of room stats. - --- single-row table to track position of incremental updates -CREATE TABLE IF NOT EXISTS stats_incremental_position ( - -- the stream_id of the last-processed state delta - state_delta_stream_id BIGINT, - - -- the stream_ordering of the last-processed backfilled event - -- (this is negative) - total_events_min_stream_ordering BIGINT, - - -- the stream_ordering of the last-processed normally-created event - -- (this is positive) - total_events_max_stream_ordering BIGINT, - - -- If true, this represents the contract agreed upon by the background - -- population processor. - -- If false, this is suitable for use by the delta/incremental processor. - is_background_contract BOOLEAN NOT NULL PRIMARY KEY -); - --- insert a null row and make sure it is the only one. -DELETE FROM stats_incremental_position; -INSERT INTO stats_incremental_position ( - state_delta_stream_id, - total_events_min_stream_ordering, - total_events_max_stream_ordering, - is_background_contract -) VALUES (NULL, NULL, NULL, FALSE), (NULL, NULL, NULL, TRUE); - --- represents PRESENT room statistics for a room -CREATE TABLE IF NOT EXISTS room_stats_current ( - room_id TEXT NOT NULL PRIMARY KEY, - - -- These starts cover the time from start_ts…end_ts (in seconds). - -- Note that end_ts is quantised, and start_ts usually so. - start_ts BIGINT, - end_ts BIGINT, - - current_state_events INT NOT NULL DEFAULT 0, - total_events INT NOT NULL DEFAULT 0, - joined_members INT NOT NULL DEFAULT 0, - invited_members INT NOT NULL DEFAULT 0, - left_members INT NOT NULL DEFAULT 0, - banned_members INT NOT NULL DEFAULT 0, - - -- If initial background count is still to be performed: NULL - -- If initial background count has been performed: the maximum delta stream - -- position that this row takes into account. - completed_delta_stream_id BIGINT, - - CONSTRAINT timestamp_nullity_equality CHECK ((start_ts IS NULL) = (end_ts IS NULL)) -); - --- TODO check: make it easier to find dirty rows --- TODO check we specify 'AND start_ts IS NOT NULL' in old collector, to take --- advantage of optimisations. -CREATE INDEX IF NOT EXISTS room_stats_current_dirty ON room_stats_current (start_ts) - WHERE start_ts IS NOT NULL; - --- TODO check: make it easier to find incomplete rows -CREATE INDEX IF NOT EXISTS room_stats_not_complete ON room_stats_current (room_id) - WHERE completed_delta_stream_id IS NULL; - --- represents HISTORICAL room statistics for a room -CREATE TABLE IF NOT EXISTS room_stats_historical ( - room_id TEXT NOT NULL, - -- These starts cover the time from (end_ts - bucket_size)…end_ts (in seconds). - -- Note that end_ts is quantised, and start_ts usually so. - end_ts BIGINT NOT NULL, - bucket_size INT NOT NULL, - PRIMARY KEY (room_id, end_ts), - - current_state_events INT NOT NULL, - total_events INT NOT NULL, - joined_members INT NOT NULL, - invited_members INT NOT NULL, - left_members INT NOT NULL, - banned_members INT NOT NULL -); - --- We use this index to speed up deletion of old user stats. -CREATE INDEX IF NOT EXISTS room_stats_historical_end_ts ON room_stats_historical (end_ts); - --- We don't need an index on (room_id, end_ts) because PRIMARY KEY sorts that --- out for us. (We would want it to review stats for a particular room.) - - --- represents PRESENT statistics for a user -CREATE TABLE IF NOT EXISTS user_stats_current ( - user_id TEXT NOT NULL PRIMARY KEY, - - -- The timestamp that represents the start of the - start_ts BIGINT, - end_ts BIGINT, - - public_rooms INT DEFAULT 0 NOT NULL, - private_rooms INT DEFAULT 0 NOT NULL, - - -- If initial background count is still to be performed: NULL - -- If initial background count has been performed: the maximum delta stream - -- position that this row takes into account. - completed_delta_stream_id BIGINT -); - -CREATE INDEX IF NOT EXISTS user_stats_current_dirty ON user_stats_current (start_ts) - WHERE start_ts IS NOT NULL; - --- TODO check: make it easier to find incomplete rows -CREATE INDEX IF NOT EXISTS user_stats_not_complete ON user_stats_current (user_id) - WHERE completed_delta_stream_id IS NULL; - --- represents HISTORICAL statistics for a user -CREATE TABLE IF NOT EXISTS user_stats_historical ( - user_id TEXT NOT NULL, - end_ts BIGINT NOT NULL, - bucket_size INT NOT NULL, - PRIMARY KEY (user_id, end_ts), - - public_rooms INT NOT NULL, - private_rooms INT NOT NULL -); - --- We use this index to speed up deletion of old user stats. -CREATE INDEX IF NOT EXISTS user_stats_historical_end_ts ON user_stats_historical (end_ts); - --- We don't need an index on (user_id, end_ts) because PRIMARY KEY sorts that --- out for us. (We would want it to review stats for a particular user.) - --- TODO old SQLites may not support partial indices - - --- Set up staging tables --- we depend on current_state_events_membership because this is used --- in our counting. -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_prepare', '{}', 'current_state_events_membership'); - --- Run through each room and update stats -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_process_rooms', '{}', 'populate_stats_prepare'); - --- Run through each user and update stats. -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_process_users', '{}', 'populate_stats_process_rooms'); - --- Clean up staging tables -INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_stats_cleanup', '{}', 'populate_stats_process_users'); diff --git a/synapse/storage/schema/delta/56/stats_separated1.sql b/synapse/storage/schema/delta/56/stats_separated1.sql new file mode 100644 index 0000000000..26606b2b60 --- /dev/null +++ b/synapse/storage/schema/delta/56/stats_separated1.sql @@ -0,0 +1,182 @@ +/* Copyright 2018 New Vector Ltd + * Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +----- First clean up from previous versions of room stats. + +-- First remove old stats stuff +DROP TABLE IF EXISTS room_stats; +DROP TABLE IF EXISTS user_stats; +DROP TABLE IF EXISTS room_stats_earliest_tokens; +DROP TABLE IF EXISTS _temp_populate_stats_position; +DROP TABLE IF EXISTS _temp_populate_stats_rooms; +DROP TABLE IF EXISTS stats_stream_pos; + +-- Unschedule old background updates if they're still scheduled +DELETE FROM background_updates WHERE update_name IN ( + 'populate_stats_createtables', + 'populate_stats_process_rooms', + 'populate_stats_cleanup', + 'regen_stats' +); + +----- Create tables for our version of room stats. + +-- single-row table to track position of incremental updates +CREATE TABLE IF NOT EXISTS stats_incremental_position ( + -- the stream_id of the last-processed state delta + state_delta_stream_id BIGINT, + + -- the stream_ordering of the last-processed backfilled event + -- (this is negative) + total_events_min_stream_ordering BIGINT, + + -- the stream_ordering of the last-processed normally-created event + -- (this is positive) + total_events_max_stream_ordering BIGINT, + + -- If true, this represents the contract agreed upon by the background + -- population processor. + -- If false, this is suitable for use by the delta/incremental processor. + is_background_contract BOOLEAN NOT NULL PRIMARY KEY +); + +-- insert a null row and make sure it is the only one. +DELETE FROM stats_incremental_position; +INSERT INTO stats_incremental_position ( + state_delta_stream_id, + total_events_min_stream_ordering, + total_events_max_stream_ordering, + is_background_contract +) VALUES (NULL, NULL, NULL, FALSE), (NULL, NULL, NULL, TRUE); + +-- represents PRESENT room statistics for a room +CREATE TABLE IF NOT EXISTS room_stats_current ( + room_id TEXT NOT NULL PRIMARY KEY, + + -- These starts cover the time from start_ts…end_ts (in seconds). + -- Note that end_ts is quantised, and start_ts usually so. + start_ts BIGINT, + end_ts BIGINT, + + current_state_events INT NOT NULL DEFAULT 0, + total_events INT NOT NULL DEFAULT 0, + joined_members INT NOT NULL DEFAULT 0, + invited_members INT NOT NULL DEFAULT 0, + left_members INT NOT NULL DEFAULT 0, + banned_members INT NOT NULL DEFAULT 0, + + -- If initial background count is still to be performed: NULL + -- If initial background count has been performed: the maximum delta stream + -- position that this row takes into account. + completed_delta_stream_id BIGINT, + + CONSTRAINT timestamp_nullity_equality CHECK ((start_ts IS NULL) = (end_ts IS NULL)) +); + +-- TODO check: make it easier to find dirty rows +-- TODO check we specify 'AND start_ts IS NOT NULL' in old collector, to take +-- advantage of optimisations. + + +-- TODO check: make it easier to find incomplete rows + + +-- represents HISTORICAL room statistics for a room +CREATE TABLE IF NOT EXISTS room_stats_historical ( + room_id TEXT NOT NULL, + -- These starts cover the time from (end_ts - bucket_size)…end_ts (in seconds). + -- Note that end_ts is quantised, and start_ts usually so. + end_ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + PRIMARY KEY (room_id, end_ts), + + current_state_events INT NOT NULL, + total_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL +); + +-- We use this index to speed up deletion of old user stats. +CREATE INDEX IF NOT EXISTS room_stats_historical_end_ts ON room_stats_historical (end_ts); + +-- We don't need an index on (room_id, end_ts) because PRIMARY KEY sorts that +-- out for us. (We would want it to review stats for a particular room.) + + +-- represents PRESENT statistics for a user +CREATE TABLE IF NOT EXISTS user_stats_current ( + user_id TEXT NOT NULL PRIMARY KEY, + + -- The timestamp that represents the start of the + start_ts BIGINT, + end_ts BIGINT, + + public_rooms INT DEFAULT 0 NOT NULL, + private_rooms INT DEFAULT 0 NOT NULL, + + -- If initial background count is still to be performed: NULL + -- If initial background count has been performed: the maximum delta stream + -- position that this row takes into account. + completed_delta_stream_id BIGINT +); + +CREATE INDEX IF NOT EXISTS user_stats_current_dirty ON user_stats_current (start_ts) + WHERE start_ts IS NOT NULL; + +-- TODO check: make it easier to find incomplete rows +CREATE INDEX IF NOT EXISTS user_stats_not_complete ON user_stats_current (user_id) + WHERE completed_delta_stream_id IS NULL; + +-- represents HISTORICAL statistics for a user +CREATE TABLE IF NOT EXISTS user_stats_historical ( + user_id TEXT NOT NULL, + end_ts BIGINT NOT NULL, + bucket_size INT NOT NULL, + PRIMARY KEY (user_id, end_ts), + + public_rooms INT NOT NULL, + private_rooms INT NOT NULL +); + +-- We use this index to speed up deletion of old user stats. +CREATE INDEX IF NOT EXISTS user_stats_historical_end_ts ON user_stats_historical (end_ts); + +-- We don't need an index on (user_id, end_ts) because PRIMARY KEY sorts that +-- out for us. (We would want it to review stats for a particular user.) + +-- TODO old SQLites may not support partial indices + + +-- Set up staging tables +-- we depend on current_state_events_membership because this is used +-- in our counting. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_prepare', '{}', 'current_state_events_membership'); + +-- Run through each room and update stats +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_rooms', '{}', 'populate_stats_prepare'); + +-- Run through each user and update stats. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_process_users', '{}', 'populate_stats_process_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_stats_cleanup', '{}', 'populate_stats_process_users'); diff --git a/synapse/storage/schema/delta/56/stats_separated2.py b/synapse/storage/schema/delta/56/stats_separated2.py new file mode 100644 index 0000000000..b92984a8cf --- /dev/null +++ b/synapse/storage/schema/delta/56/stats_separated2.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This schema delta will be run after 'stats_separated1.sql' due to lexicographic +# ordering. Note that it MUST be so. +from synapse.storage.engines import Sqlite3Engine, PostgresEngine + + +def _run_create_generic(stats_type, cursor, database_engine): + """ + Creates the pertinent (partial, if supported) indices for one kind of stats. + Args: + stats_type: "room" or "user" – the type of stats + cursor: Database Cursor + database_engine: Database Engine + """ + if isinstance(database_engine, Sqlite3Engine): + # even though SQLite >= 3.8 can support partial indices, we won't enable + # them, in case the SQLite database may be later used on another system. + # It's also the case that SQLite is only likely to be used in small + # deployments or testing, where the optimisations gained by use of a + # partial index are not a big concern. + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS %s_stats_current_dirty + ON %s_stats_current (end_ts); + """ + % (stats_type, stats_type) + ) + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS %s_stats_not_complete + ON %s_stats_current (completed_delta_stream_id, room_id); + """ + % (stats_type, stats_type) + ) + elif isinstance(database_engine, PostgresEngine): + # This partial index helps us with finding dirty stats rows + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS %s_stats_current_dirty + ON %s_stats_current (end_ts) + WHERE end_ts IS NOT NULL; + """ + % (stats_type, stats_type) + ) + # This partial index helps us with old collection + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS %s_stats_not_complete + ON %s_stats_current (room_id) + WHERE completed_delta_stream_id IS NULL; + """ + % (stats_type, stats_type) + ) + else: + raise NotImplementedError("Unknown database engine.") + + +def run_create(cursor, database_engine): + """ + This function is called as part of the schema delta. + It will create indices – partial, if supported – for the new 'separated' + room & user statistics. + """ + _run_create_generic("room", cursor, database_engine) + _run_create_generic("user", cursor, database_engine) + + +def run_upgrade(cur, database_engine, config): + """ + This function is run on a database upgrade (of a non-empty database). + We have no need to do anything specific here. + """ + pass -- cgit 1.5.1