diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py
index c67fe69a50..f20d810ece 100644
--- a/synapse/app/event_creator.py
+++ b/synapse/app/event_creator.py
@@ -56,8 +56,8 @@ from synapse.rest.client.v1.room import (
RoomStateEventRestServlet,
)
from synapse.server import HomeServer
+from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
from synapse.storage.engines import create_engine
-from synapse.storage.user_directory import UserDirectoryStore
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.manhole import manhole
from synapse.util.versionstring import get_version_string
diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py
index 2ac783ffa3..6bc7202f33 100644
--- a/synapse/app/media_repository.py
+++ b/synapse/app/media_repository.py
@@ -39,8 +39,8 @@ from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.rest.admin import register_servlets_for_media_repo
from synapse.rest.media.v0.content_repository import ContentRepoResource
from synapse.server import HomeServer
+from synapse.storage.data_stores.main.media_repository import MediaRepositoryStore
from synapse.storage.engines import create_engine
-from synapse.storage.media_repository import MediaRepositoryStore
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.manhole import manhole
from synapse.util.versionstring import get_version_string
diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py
index 473026fce5..6a7e2fa707 100644
--- a/synapse/app/synchrotron.py
+++ b/synapse/app/synchrotron.py
@@ -54,8 +54,8 @@ from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
from synapse.rest.client.v1.room import RoomInitialSyncRestServlet
from synapse.rest.client.v2_alpha import sync
from synapse.server import HomeServer
+from synapse.storage.data_stores.main.presence import UserPresenceState
from synapse.storage.engines import create_engine
-from synapse.storage.presence import UserPresenceState
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.manhole import manhole
from synapse.util.stringutils import random_string
diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py
index e01afb39f2..a5d6dc7915 100644
--- a/synapse/app/user_dir.py
+++ b/synapse/app/user_dir.py
@@ -42,8 +42,8 @@ from synapse.replication.tcp.streams.events import (
)
from synapse.rest.client.v2_alpha import user_directory
from synapse.server import HomeServer
+from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
from synapse.storage.engines import create_engine
-from synapse.storage.user_directory import UserDirectoryStore
from synapse.util.caches.stream_change_cache import StreamChangeCache
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.manhole import manhole
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index fad980b893..cc75c39476 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -30,7 +30,7 @@ from synapse.federation.units import Edu
from synapse.handlers.presence import format_user_presence_state
from synapse.metrics import sent_transactions_counter
from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage import UserPresenceState
+from synapse.storage.presence import UserPresenceState
from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
# This is defined in the Matrix spec and enforced by the receiver.
diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py
index 3c44d1d48d..bc2f6a12ae 100644
--- a/synapse/replication/slave/storage/account_data.py
+++ b/synapse/replication/slave/storage/account_data.py
@@ -16,8 +16,8 @@
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.storage.account_data import AccountDataWorkerStore
-from synapse.storage.tags import TagsWorkerStore
+from synapse.storage.data_stores.main.account_data import AccountDataWorkerStore
+from synapse.storage.data_stores.main.tags import TagsWorkerStore
class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore):
diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py
index cda12ea70d..a67fbeffb7 100644
--- a/synapse/replication/slave/storage/appservice.py
+++ b/synapse/replication/slave/storage/appservice.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.appservice import (
+from synapse.storage.data_stores.main.appservice import (
ApplicationServiceTransactionWorkerStore,
ApplicationServiceWorkerStore,
)
diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py
index 14ced32333..b4f58cea19 100644
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.client_ips import LAST_SEEN_GRANULARITY
+from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY
from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.caches.descriptors import Cache
diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py
index 284fd30d89..9fb6c5c6ff 100644
--- a/synapse/replication/slave/storage/deviceinbox.py
+++ b/synapse/replication/slave/storage/deviceinbox.py
@@ -15,7 +15,7 @@
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.storage.deviceinbox import DeviceInboxWorkerStore
+from synapse.storage.data_stores.main.deviceinbox import DeviceInboxWorkerStore
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.caches.stream_change_cache import StreamChangeCache
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index d9300fce33..f856c72d84 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -15,8 +15,8 @@
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.storage.devices import DeviceWorkerStore
-from synapse.storage.end_to_end_keys import EndToEndKeyWorkerStore
+from synapse.storage.data_stores.main.devices import DeviceWorkerStore
+from synapse.storage.data_stores.main.end_to_end_keys import EndToEndKeyWorkerStore
from synapse.util.caches.stream_change_cache import StreamChangeCache
diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py
index 1d1d48709a..8b9717c46f 100644
--- a/synapse/replication/slave/storage/directory.py
+++ b/synapse/replication/slave/storage/directory.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.directory import DirectoryWorkerStore
+from synapse.storage.data_stores.main.directory import DirectoryWorkerStore
from ._base import BaseSlavedStore
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index ab5937e638..d0a0eaf75b 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -20,15 +20,17 @@ from synapse.replication.tcp.streams.events import (
EventsStreamCurrentStateRow,
EventsStreamEventRow,
)
-from synapse.storage.event_federation import EventFederationWorkerStore
-from synapse.storage.event_push_actions import EventPushActionsWorkerStore
-from synapse.storage.events_worker import EventsWorkerStore
-from synapse.storage.relations import RelationsWorkerStore
-from synapse.storage.roommember import RoomMemberWorkerStore
-from synapse.storage.signatures import SignatureWorkerStore
-from synapse.storage.state import StateGroupWorkerStore
-from synapse.storage.stream import StreamWorkerStore
-from synapse.storage.user_erasure_store import UserErasureWorkerStore
+from synapse.storage.data_stores.main.event_federation import EventFederationWorkerStore
+from synapse.storage.data_stores.main.event_push_actions import (
+ EventPushActionsWorkerStore,
+)
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
+from synapse.storage.data_stores.main.relations import RelationsWorkerStore
+from synapse.storage.data_stores.main.roommember import RoomMemberWorkerStore
+from synapse.storage.data_stores.main.signatures import SignatureWorkerStore
+from synapse.storage.data_stores.main.state import StateGroupWorkerStore
+from synapse.storage.data_stores.main.stream import StreamWorkerStore
+from synapse.storage.data_stores.main.user_erasure_store import UserErasureWorkerStore
from ._base import BaseSlavedStore
from ._slaved_id_tracker import SlavedIdTracker
diff --git a/synapse/replication/slave/storage/filtering.py b/synapse/replication/slave/storage/filtering.py
index 456a14cd5c..5c84ebd125 100644
--- a/synapse/replication/slave/storage/filtering.py
+++ b/synapse/replication/slave/storage/filtering.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.filtering import FilteringStore
+from synapse.storage.data_stores.main.filtering import FilteringStore
from ._base import BaseSlavedStore
diff --git a/synapse/replication/slave/storage/keys.py b/synapse/replication/slave/storage/keys.py
index cc6f7f009f..3def367ae9 100644
--- a/synapse/replication/slave/storage/keys.py
+++ b/synapse/replication/slave/storage/keys.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage import KeyStore
+from synapse.storage.data_stores.main.keys import KeyStore
# KeyStore isn't really safe to use from a worker, but for now we do so and hope that
# the races it creates aren't too bad.
diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py
index 82d808af4c..747ced0c84 100644
--- a/synapse/replication/slave/storage/presence.py
+++ b/synapse/replication/slave/storage/presence.py
@@ -14,7 +14,7 @@
# limitations under the License.
from synapse.storage import DataStore
-from synapse.storage.presence import PresenceStore
+from synapse.storage.data_stores.main.presence import PresenceStore
from synapse.util.caches.stream_change_cache import StreamChangeCache
from ._base import BaseSlavedStore, __func__
diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py
index 46c28d4171..28c508aad3 100644
--- a/synapse/replication/slave/storage/profile.py
+++ b/synapse/replication/slave/storage/profile.py
@@ -14,7 +14,7 @@
# limitations under the License.
from synapse.replication.slave.storage._base import BaseSlavedStore
-from synapse.storage.profile import ProfileWorkerStore
+from synapse.storage.data_stores.main.profile import ProfileWorkerStore
class SlavedProfileStore(ProfileWorkerStore, BaseSlavedStore):
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
index af7012702e..3655f05e54 100644
--- a/synapse/replication/slave/storage/push_rule.py
+++ b/synapse/replication/slave/storage/push_rule.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.push_rule import PushRulesWorkerStore
+from synapse.storage.data_stores.main.push_rule import PushRulesWorkerStore
from ._slaved_id_tracker import SlavedIdTracker
from .events import SlavedEventStore
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index 8eeb267d61..b4331d0799 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.pusher import PusherWorkerStore
+from synapse.storage.data_stores.main.pusher import PusherWorkerStore
from ._base import BaseSlavedStore
from ._slaved_id_tracker import SlavedIdTracker
diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py
index 91afa5a72b..43d823c601 100644
--- a/synapse/replication/slave/storage/receipts.py
+++ b/synapse/replication/slave/storage/receipts.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.receipts import ReceiptsWorkerStore
+from synapse.storage.data_stores.main.receipts import ReceiptsWorkerStore
from ._base import BaseSlavedStore
from ._slaved_id_tracker import SlavedIdTracker
diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py
index 408d91df1c..4b8553e250 100644
--- a/synapse/replication/slave/storage/registration.py
+++ b/synapse/replication/slave/storage/registration.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.registration import RegistrationWorkerStore
+from synapse.storage.data_stores.main.registration import RegistrationWorkerStore
from ._base import BaseSlavedStore
diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py
index f68b3378e3..d9ad386b28 100644
--- a/synapse/replication/slave/storage/room.py
+++ b/synapse/replication/slave/storage/room.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.room import RoomWorkerStore
+from synapse.storage.data_stores.main.room import RoomWorkerStore
from ._base import BaseSlavedStore
from ._slaved_id_tracker import SlavedIdTracker
diff --git a/synapse/replication/slave/storage/transactions.py b/synapse/replication/slave/storage/transactions.py
index 3527beb3c9..ac88e6b8c3 100644
--- a/synapse/replication/slave/storage/transactions.py
+++ b/synapse/replication/slave/storage/transactions.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse.storage.transactions import TransactionStore
+from synapse.storage.data_stores.main.transactions import TransactionStore
from ._base import BaseSlavedStore
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index e7f6ea7286..e42fba45a1 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -14,509 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import calendar
-import logging
-import time
-
-from twisted.internet import defer
-
-from synapse.api.constants import PresenceState
-from synapse.storage.devices import DeviceStore
-from synapse.storage.user_erasure_store import UserErasureStore
-from synapse.util.caches.stream_change_cache import StreamChangeCache
-
-from .account_data import AccountDataStore
-from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
-from .client_ips import ClientIpStore
-from .deviceinbox import DeviceInboxStore
-from .directory import DirectoryStore
-from .e2e_room_keys import EndToEndRoomKeyStore
-from .end_to_end_keys import EndToEndKeyStore
-from .engines import PostgresEngine
-from .event_federation import EventFederationStore
-from .event_push_actions import EventPushActionsStore
-from .events import EventsStore
-from .events_bg_updates import EventsBackgroundUpdatesStore
-from .filtering import FilteringStore
-from .group_server import GroupServerStore
-from .keys import KeyStore
-from .media_repository import MediaRepositoryStore
-from .monthly_active_users import MonthlyActiveUsersStore
-from .openid import OpenIdStore
-from .presence import PresenceStore, UserPresenceState
-from .profile import ProfileStore
-from .push_rule import PushRuleStore
-from .pusher import PusherStore
-from .receipts import ReceiptsStore
-from .registration import RegistrationStore
-from .rejections import RejectionsStore
-from .relations import RelationsStore
-from .room import RoomStore
-from .roommember import RoomMemberStore
-from .search import SearchStore
-from .signatures import SignatureStore
-from .state import StateStore
-from .stats import StatsStore
-from .stream import StreamStore
-from .tags import TagsStore
-from .transactions import TransactionStore
-from .user_directory import UserDirectoryStore
-from .util.id_generators import ChainedIdGenerator, IdGenerator, StreamIdGenerator
-
-logger = logging.getLogger(__name__)
-
-
-class DataStore(
- EventsBackgroundUpdatesStore,
- RoomMemberStore,
- RoomStore,
- RegistrationStore,
- StreamStore,
- ProfileStore,
- PresenceStore,
- TransactionStore,
- DirectoryStore,
- KeyStore,
- StateStore,
- SignatureStore,
- ApplicationServiceStore,
- EventsStore,
- EventFederationStore,
- MediaRepositoryStore,
- RejectionsStore,
- FilteringStore,
- PusherStore,
- PushRuleStore,
- ApplicationServiceTransactionStore,
- ReceiptsStore,
- EndToEndKeyStore,
- EndToEndRoomKeyStore,
- SearchStore,
- TagsStore,
- AccountDataStore,
- EventPushActionsStore,
- OpenIdStore,
- ClientIpStore,
- DeviceStore,
- DeviceInboxStore,
- UserDirectoryStore,
- GroupServerStore,
- UserErasureStore,
- MonthlyActiveUsersStore,
- StatsStore,
- RelationsStore,
-):
- def __init__(self, db_conn, hs):
- self.hs = hs
- self._clock = hs.get_clock()
- self.database_engine = hs.database_engine
-
- self._stream_id_gen = StreamIdGenerator(
- db_conn,
- "events",
- "stream_ordering",
- extra_tables=[("local_invites", "stream_id")],
- )
- self._backfill_id_gen = StreamIdGenerator(
- db_conn,
- "events",
- "stream_ordering",
- step=-1,
- extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
- )
- self._presence_id_gen = StreamIdGenerator(
- db_conn, "presence_stream", "stream_id"
- )
- self._device_inbox_id_gen = StreamIdGenerator(
- db_conn, "device_max_stream_id", "stream_id"
- )
- self._public_room_id_gen = StreamIdGenerator(
- db_conn, "public_room_list_stream", "stream_id"
- )
- self._device_list_id_gen = StreamIdGenerator(
- db_conn, "device_lists_stream", "stream_id"
- )
-
- self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
- self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
- self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
- self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id")
- self._push_rules_stream_id_gen = ChainedIdGenerator(
- self._stream_id_gen, db_conn, "push_rules_stream", "stream_id"
- )
- self._pushers_id_gen = StreamIdGenerator(
- db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
- )
- self._group_updates_id_gen = StreamIdGenerator(
- db_conn, "local_group_updates", "stream_id"
- )
-
- if isinstance(self.database_engine, PostgresEngine):
- self._cache_id_gen = StreamIdGenerator(
- db_conn, "cache_invalidation_stream", "stream_id"
- )
- else:
- self._cache_id_gen = None
-
- self._presence_on_startup = self._get_active_presence(db_conn)
-
- presence_cache_prefill, min_presence_val = self._get_cache_dict(
- db_conn,
- "presence_stream",
- entity_column="user_id",
- stream_column="stream_id",
- max_value=self._presence_id_gen.get_current_token(),
- )
- self.presence_stream_cache = StreamChangeCache(
- "PresenceStreamChangeCache",
- min_presence_val,
- prefilled_cache=presence_cache_prefill,
- )
-
- max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
- device_inbox_prefill, min_device_inbox_id = self._get_cache_dict(
- db_conn,
- "device_inbox",
- entity_column="user_id",
- stream_column="stream_id",
- max_value=max_device_inbox_id,
- limit=1000,
- )
- self._device_inbox_stream_cache = StreamChangeCache(
- "DeviceInboxStreamChangeCache",
- min_device_inbox_id,
- prefilled_cache=device_inbox_prefill,
- )
- # The federation outbox and the local device inbox uses the same
- # stream_id generator.
- device_outbox_prefill, min_device_outbox_id = self._get_cache_dict(
- db_conn,
- "device_federation_outbox",
- entity_column="destination",
- stream_column="stream_id",
- max_value=max_device_inbox_id,
- limit=1000,
- )
- self._device_federation_outbox_stream_cache = StreamChangeCache(
- "DeviceFederationOutboxStreamChangeCache",
- min_device_outbox_id,
- prefilled_cache=device_outbox_prefill,
- )
-
- device_list_max = self._device_list_id_gen.get_current_token()
- self._device_list_stream_cache = StreamChangeCache(
- "DeviceListStreamChangeCache", device_list_max
- )
- self._device_list_federation_stream_cache = StreamChangeCache(
- "DeviceListFederationStreamChangeCache", device_list_max
- )
-
- events_max = self._stream_id_gen.get_current_token()
- curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict(
- db_conn,
- "current_state_delta_stream",
- entity_column="room_id",
- stream_column="stream_id",
- max_value=events_max, # As we share the stream id with events token
- limit=1000,
- )
- self._curr_state_delta_stream_cache = StreamChangeCache(
- "_curr_state_delta_stream_cache",
- min_curr_state_delta_id,
- prefilled_cache=curr_state_delta_prefill,
- )
-
- _group_updates_prefill, min_group_updates_id = self._get_cache_dict(
- db_conn,
- "local_group_updates",
- entity_column="user_id",
- stream_column="stream_id",
- max_value=self._group_updates_id_gen.get_current_token(),
- limit=1000,
- )
- self._group_updates_stream_cache = StreamChangeCache(
- "_group_updates_stream_cache",
- min_group_updates_id,
- prefilled_cache=_group_updates_prefill,
- )
-
- self._stream_order_on_start = self.get_room_max_stream_ordering()
- self._min_stream_order_on_start = self.get_room_min_stream_ordering()
-
- # Used in _generate_user_daily_visits to keep track of progress
- self._last_user_visit_update = self._get_start_of_day()
-
- super(DataStore, self).__init__(db_conn, hs)
-
- def take_presence_startup_info(self):
- active_on_startup = self._presence_on_startup
- self._presence_on_startup = None
- return active_on_startup
-
- def _get_active_presence(self, db_conn):
- """Fetch non-offline presence from the database so that we can register
- the appropriate time outs.
- """
-
- sql = (
- "SELECT user_id, state, last_active_ts, last_federation_update_ts,"
- " last_user_sync_ts, status_msg, currently_active FROM presence_stream"
- " WHERE state != ?"
- )
- sql = self.database_engine.convert_param_style(sql)
-
- txn = db_conn.cursor()
- txn.execute(sql, (PresenceState.OFFLINE,))
- rows = self.cursor_to_dict(txn)
- txn.close()
-
- for row in rows:
- row["currently_active"] = bool(row["currently_active"])
-
- return [UserPresenceState(**row) for row in rows]
-
- def count_daily_users(self):
- """
- Counts the number of users who used this homeserver in the last 24 hours.
- """
- yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
- return self.runInteraction("count_daily_users", self._count_users, yesterday)
-
- def count_monthly_users(self):
- """
- Counts the number of users who used this homeserver in the last 30 days.
- Note this method is intended for phonehome metrics only and is different
- from the mau figure in synapse.storage.monthly_active_users which,
- amongst other things, includes a 3 day grace period before a user counts.
- """
- thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
- return self.runInteraction(
- "count_monthly_users", self._count_users, thirty_days_ago
- )
-
- def _count_users(self, txn, time_from):
- """
- Returns number of users seen in the past time_from period
- """
- sql = """
- SELECT COALESCE(count(*), 0) FROM (
- SELECT user_id FROM user_ips
- WHERE last_seen > ?
- GROUP BY user_id
- ) u
- """
- txn.execute(sql, (time_from,))
- count, = txn.fetchone()
- return count
-
- def count_r30_users(self):
- """
- Counts the number of 30 day retained users, defined as:-
- * Users who have created their accounts more than 30 days ago
- * Where last seen at most 30 days ago
- * Where account creation and last_seen are > 30 days apart
-
- Returns counts globaly for a given user as well as breaking
- by platform
- """
-
- def _count_r30_users(txn):
- thirty_days_in_secs = 86400 * 30
- now = int(self._clock.time())
- thirty_days_ago_in_secs = now - thirty_days_in_secs
-
- sql = """
- SELECT platform, COALESCE(count(*), 0) FROM (
- SELECT
- users.name, platform, users.creation_ts * 1000,
- MAX(uip.last_seen)
- FROM users
- INNER JOIN (
- SELECT
- user_id,
- last_seen,
- CASE
- WHEN user_agent LIKE '%%Android%%' THEN 'android'
- WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
- WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
- WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
- WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
- ELSE 'unknown'
- END
- AS platform
- FROM user_ips
- ) uip
- ON users.name = uip.user_id
- AND users.appservice_id is NULL
- AND users.creation_ts < ?
- AND uip.last_seen/1000 > ?
- AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
- GROUP BY users.name, platform, users.creation_ts
- ) u GROUP BY platform
- """
-
- results = {}
- txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
-
- for row in txn:
- if row[0] == "unknown":
- pass
- results[row[0]] = row[1]
-
- sql = """
- SELECT COALESCE(count(*), 0) FROM (
- SELECT users.name, users.creation_ts * 1000,
- MAX(uip.last_seen)
- FROM users
- INNER JOIN (
- SELECT
- user_id,
- last_seen
- FROM user_ips
- ) uip
- ON users.name = uip.user_id
- AND appservice_id is NULL
- AND users.creation_ts < ?
- AND uip.last_seen/1000 > ?
- AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
- GROUP BY users.name, users.creation_ts
- ) u
- """
-
- txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
-
- count, = txn.fetchone()
- results["all"] = count
-
- return results
-
- return self.runInteraction("count_r30_users", _count_r30_users)
-
- def _get_start_of_day(self):
- """
- Returns millisecond unixtime for start of UTC day.
- """
- now = time.gmtime()
- today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
- return today_start * 1000
-
- def generate_user_daily_visits(self):
- """
- Generates daily visit data for use in cohort/ retention analysis
- """
-
- def _generate_user_daily_visits(txn):
- logger.info("Calling _generate_user_daily_visits")
- today_start = self._get_start_of_day()
- a_day_in_milliseconds = 24 * 60 * 60 * 1000
- now = self.clock.time_msec()
-
- sql = """
- INSERT INTO user_daily_visits (user_id, device_id, timestamp)
- SELECT u.user_id, u.device_id, ?
- FROM user_ips AS u
- LEFT JOIN (
- SELECT user_id, device_id, timestamp FROM user_daily_visits
- WHERE timestamp = ?
- ) udv
- ON u.user_id = udv.user_id AND u.device_id=udv.device_id
- INNER JOIN users ON users.name=u.user_id
- WHERE last_seen > ? AND last_seen <= ?
- AND udv.timestamp IS NULL AND users.is_guest=0
- AND users.appservice_id IS NULL
- GROUP BY u.user_id, u.device_id
- """
-
- # This means that the day has rolled over but there could still
- # be entries from the previous day. There is an edge case
- # where if the user logs in at 23:59 and overwrites their
- # last_seen at 00:01 then they will not be counted in the
- # previous day's stats - it is important that the query is run
- # often to minimise this case.
- if today_start > self._last_user_visit_update:
- yesterday_start = today_start - a_day_in_milliseconds
- txn.execute(
- sql,
- (
- yesterday_start,
- yesterday_start,
- self._last_user_visit_update,
- today_start,
- ),
- )
- self._last_user_visit_update = today_start
-
- txn.execute(
- sql, (today_start, today_start, self._last_user_visit_update, now)
- )
- # Update _last_user_visit_update to now. The reason to do this
- # rather just clamping to the beginning of the day is to limit
- # the size of the join - meaning that the query can be run more
- # frequently
- self._last_user_visit_update = now
-
- return self.runInteraction(
- "generate_user_daily_visits", _generate_user_daily_visits
- )
-
- def get_users(self):
- """Function to reterive a list of users in users table.
-
- Args:
- Returns:
- defer.Deferred: resolves to list[dict[str, Any]]
- """
- return self._simple_select_list(
- table="users",
- keyvalues={},
- retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
- desc="get_users",
- )
-
- @defer.inlineCallbacks
- def get_users_paginate(self, order, start, limit):
- """Function to reterive a paginated list of users from
- users list. This will return a json object, which contains
- list of users and the total number of users in users table.
-
- Args:
- order (str): column name to order the select by this column
- start (int): start number to begin the query from
- limit (int): number of rows to reterive
- Returns:
- defer.Deferred: resolves to json object {list[dict[str, Any]], count}
- """
- users = yield self.runInteraction(
- "get_users_paginate",
- self._simple_select_list_paginate_txn,
- table="users",
- keyvalues={"is_guest": False},
- orderby=order,
- start=start,
- limit=limit,
- retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
- )
- count = yield self.runInteraction("get_users_paginate", self.get_user_count_txn)
- retval = {"users": users, "total": count}
- return retval
-
- def search_users(self, term):
- """Function to search users list for one or more users with
- the matched term.
-
- Args:
- term (str): search term
- col (str): column to query term should be matched to
- Returns:
- defer.Deferred: resolves to list[dict[str, Any]]
- """
- return self._simple_search_list(
- table="users",
- term=term,
- col="name",
- retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
- desc="search_users",
- )
+from synapse.storage.data_stores.main import DataStore # noqa: F401
def are_all_users_on_domain(txn, database_engine, domain):
diff --git a/synapse/storage/data_stores/__init__.py b/synapse/storage/data_stores/__init__.py
new file mode 100644
index 0000000000..56094078ed
--- /dev/null
+++ b/synapse/storage/data_stores/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/synapse/storage/data_stores/main/__init__.py b/synapse/storage/data_stores/main/__init__.py
new file mode 100644
index 0000000000..d29135588f
--- /dev/null
+++ b/synapse/storage/data_stores/main/__init__.py
@@ -0,0 +1,524 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import calendar
+import logging
+import time
+
+from twisted.internet import defer
+
+from synapse.api.constants import PresenceState
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.util.id_generators import (
+ ChainedIdGenerator,
+ IdGenerator,
+ StreamIdGenerator,
+)
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+from .account_data import AccountDataStore
+from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
+from .client_ips import ClientIpStore
+from .deviceinbox import DeviceInboxStore
+from .devices import DeviceStore
+from .directory import DirectoryStore
+from .e2e_room_keys import EndToEndRoomKeyStore
+from .end_to_end_keys import EndToEndKeyStore
+from .event_federation import EventFederationStore
+from .event_push_actions import EventPushActionsStore
+from .events import EventsStore
+from .events_bg_updates import EventsBackgroundUpdatesStore
+from .filtering import FilteringStore
+from .group_server import GroupServerStore
+from .keys import KeyStore
+from .media_repository import MediaRepositoryStore
+from .monthly_active_users import MonthlyActiveUsersStore
+from .openid import OpenIdStore
+from .presence import PresenceStore, UserPresenceState
+from .profile import ProfileStore
+from .push_rule import PushRuleStore
+from .pusher import PusherStore
+from .receipts import ReceiptsStore
+from .registration import RegistrationStore
+from .rejections import RejectionsStore
+from .relations import RelationsStore
+from .room import RoomStore
+from .roommember import RoomMemberStore
+from .search import SearchStore
+from .signatures import SignatureStore
+from .state import StateStore
+from .stats import StatsStore
+from .stream import StreamStore
+from .tags import TagsStore
+from .transactions import TransactionStore
+from .user_directory import UserDirectoryStore
+from .user_erasure_store import UserErasureStore
+
+logger = logging.getLogger(__name__)
+
+
+class DataStore(
+ EventsBackgroundUpdatesStore,
+ RoomMemberStore,
+ RoomStore,
+ RegistrationStore,
+ StreamStore,
+ ProfileStore,
+ PresenceStore,
+ TransactionStore,
+ DirectoryStore,
+ KeyStore,
+ StateStore,
+ SignatureStore,
+ ApplicationServiceStore,
+ EventsStore,
+ EventFederationStore,
+ MediaRepositoryStore,
+ RejectionsStore,
+ FilteringStore,
+ PusherStore,
+ PushRuleStore,
+ ApplicationServiceTransactionStore,
+ ReceiptsStore,
+ EndToEndKeyStore,
+ EndToEndRoomKeyStore,
+ SearchStore,
+ TagsStore,
+ AccountDataStore,
+ EventPushActionsStore,
+ OpenIdStore,
+ ClientIpStore,
+ DeviceStore,
+ DeviceInboxStore,
+ UserDirectoryStore,
+ GroupServerStore,
+ UserErasureStore,
+ MonthlyActiveUsersStore,
+ StatsStore,
+ RelationsStore,
+):
+ def __init__(self, db_conn, hs):
+ self.hs = hs
+ self._clock = hs.get_clock()
+ self.database_engine = hs.database_engine
+
+ self._stream_id_gen = StreamIdGenerator(
+ db_conn,
+ "events",
+ "stream_ordering",
+ extra_tables=[("local_invites", "stream_id")],
+ )
+ self._backfill_id_gen = StreamIdGenerator(
+ db_conn,
+ "events",
+ "stream_ordering",
+ step=-1,
+ extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
+ )
+ self._presence_id_gen = StreamIdGenerator(
+ db_conn, "presence_stream", "stream_id"
+ )
+ self._device_inbox_id_gen = StreamIdGenerator(
+ db_conn, "device_max_stream_id", "stream_id"
+ )
+ self._public_room_id_gen = StreamIdGenerator(
+ db_conn, "public_room_list_stream", "stream_id"
+ )
+ self._device_list_id_gen = StreamIdGenerator(
+ db_conn, "device_lists_stream", "stream_id"
+ )
+
+ self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
+ self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
+ self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
+ self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id")
+ self._push_rules_stream_id_gen = ChainedIdGenerator(
+ self._stream_id_gen, db_conn, "push_rules_stream", "stream_id"
+ )
+ self._pushers_id_gen = StreamIdGenerator(
+ db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
+ )
+ self._group_updates_id_gen = StreamIdGenerator(
+ db_conn, "local_group_updates", "stream_id"
+ )
+
+ if isinstance(self.database_engine, PostgresEngine):
+ self._cache_id_gen = StreamIdGenerator(
+ db_conn, "cache_invalidation_stream", "stream_id"
+ )
+ else:
+ self._cache_id_gen = None
+
+ self._presence_on_startup = self._get_active_presence(db_conn)
+
+ presence_cache_prefill, min_presence_val = self._get_cache_dict(
+ db_conn,
+ "presence_stream",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self._presence_id_gen.get_current_token(),
+ )
+ self.presence_stream_cache = StreamChangeCache(
+ "PresenceStreamChangeCache",
+ min_presence_val,
+ prefilled_cache=presence_cache_prefill,
+ )
+
+ max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
+ device_inbox_prefill, min_device_inbox_id = self._get_cache_dict(
+ db_conn,
+ "device_inbox",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=max_device_inbox_id,
+ limit=1000,
+ )
+ self._device_inbox_stream_cache = StreamChangeCache(
+ "DeviceInboxStreamChangeCache",
+ min_device_inbox_id,
+ prefilled_cache=device_inbox_prefill,
+ )
+ # The federation outbox and the local device inbox uses the same
+ # stream_id generator.
+ device_outbox_prefill, min_device_outbox_id = self._get_cache_dict(
+ db_conn,
+ "device_federation_outbox",
+ entity_column="destination",
+ stream_column="stream_id",
+ max_value=max_device_inbox_id,
+ limit=1000,
+ )
+ self._device_federation_outbox_stream_cache = StreamChangeCache(
+ "DeviceFederationOutboxStreamChangeCache",
+ min_device_outbox_id,
+ prefilled_cache=device_outbox_prefill,
+ )
+
+ device_list_max = self._device_list_id_gen.get_current_token()
+ self._device_list_stream_cache = StreamChangeCache(
+ "DeviceListStreamChangeCache", device_list_max
+ )
+ self._device_list_federation_stream_cache = StreamChangeCache(
+ "DeviceListFederationStreamChangeCache", device_list_max
+ )
+
+ events_max = self._stream_id_gen.get_current_token()
+ curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict(
+ db_conn,
+ "current_state_delta_stream",
+ entity_column="room_id",
+ stream_column="stream_id",
+ max_value=events_max, # As we share the stream id with events token
+ limit=1000,
+ )
+ self._curr_state_delta_stream_cache = StreamChangeCache(
+ "_curr_state_delta_stream_cache",
+ min_curr_state_delta_id,
+ prefilled_cache=curr_state_delta_prefill,
+ )
+
+ _group_updates_prefill, min_group_updates_id = self._get_cache_dict(
+ db_conn,
+ "local_group_updates",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self._group_updates_id_gen.get_current_token(),
+ limit=1000,
+ )
+ self._group_updates_stream_cache = StreamChangeCache(
+ "_group_updates_stream_cache",
+ min_group_updates_id,
+ prefilled_cache=_group_updates_prefill,
+ )
+
+ self._stream_order_on_start = self.get_room_max_stream_ordering()
+ self._min_stream_order_on_start = self.get_room_min_stream_ordering()
+
+ # Used in _generate_user_daily_visits to keep track of progress
+ self._last_user_visit_update = self._get_start_of_day()
+
+ super(DataStore, self).__init__(db_conn, hs)
+
+ def take_presence_startup_info(self):
+ active_on_startup = self._presence_on_startup
+ self._presence_on_startup = None
+ return active_on_startup
+
+ def _get_active_presence(self, db_conn):
+ """Fetch non-offline presence from the database so that we can register
+ the appropriate time outs.
+ """
+
+ sql = (
+ "SELECT user_id, state, last_active_ts, last_federation_update_ts,"
+ " last_user_sync_ts, status_msg, currently_active FROM presence_stream"
+ " WHERE state != ?"
+ )
+ sql = self.database_engine.convert_param_style(sql)
+
+ txn = db_conn.cursor()
+ txn.execute(sql, (PresenceState.OFFLINE,))
+ rows = self.cursor_to_dict(txn)
+ txn.close()
+
+ for row in rows:
+ row["currently_active"] = bool(row["currently_active"])
+
+ return [UserPresenceState(**row) for row in rows]
+
+ def count_daily_users(self):
+ """
+ Counts the number of users who used this homeserver in the last 24 hours.
+ """
+ yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
+ return self.runInteraction("count_daily_users", self._count_users, yesterday)
+
+ def count_monthly_users(self):
+ """
+ Counts the number of users who used this homeserver in the last 30 days.
+ Note this method is intended for phonehome metrics only and is different
+ from the mau figure in synapse.storage.monthly_active_users which,
+ amongst other things, includes a 3 day grace period before a user counts.
+ """
+ thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
+ return self.runInteraction(
+ "count_monthly_users", self._count_users, thirty_days_ago
+ )
+
+ def _count_users(self, txn, time_from):
+ """
+ Returns number of users seen in the past time_from period
+ """
+ sql = """
+ SELECT COALESCE(count(*), 0) FROM (
+ SELECT user_id FROM user_ips
+ WHERE last_seen > ?
+ GROUP BY user_id
+ ) u
+ """
+ txn.execute(sql, (time_from,))
+ count, = txn.fetchone()
+ return count
+
+ def count_r30_users(self):
+ """
+ Counts the number of 30 day retained users, defined as:-
+ * Users who have created their accounts more than 30 days ago
+ * Where last seen at most 30 days ago
+ * Where account creation and last_seen are > 30 days apart
+
+ Returns counts globaly for a given user as well as breaking
+ by platform
+ """
+
+ def _count_r30_users(txn):
+ thirty_days_in_secs = 86400 * 30
+ now = int(self._clock.time())
+ thirty_days_ago_in_secs = now - thirty_days_in_secs
+
+ sql = """
+ SELECT platform, COALESCE(count(*), 0) FROM (
+ SELECT
+ users.name, platform, users.creation_ts * 1000,
+ MAX(uip.last_seen)
+ FROM users
+ INNER JOIN (
+ SELECT
+ user_id,
+ last_seen,
+ CASE
+ WHEN user_agent LIKE '%%Android%%' THEN 'android'
+ WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
+ WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
+ WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
+ WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
+ ELSE 'unknown'
+ END
+ AS platform
+ FROM user_ips
+ ) uip
+ ON users.name = uip.user_id
+ AND users.appservice_id is NULL
+ AND users.creation_ts < ?
+ AND uip.last_seen/1000 > ?
+ AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+ GROUP BY users.name, platform, users.creation_ts
+ ) u GROUP BY platform
+ """
+
+ results = {}
+ txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+ for row in txn:
+ if row[0] == "unknown":
+ pass
+ results[row[0]] = row[1]
+
+ sql = """
+ SELECT COALESCE(count(*), 0) FROM (
+ SELECT users.name, users.creation_ts * 1000,
+ MAX(uip.last_seen)
+ FROM users
+ INNER JOIN (
+ SELECT
+ user_id,
+ last_seen
+ FROM user_ips
+ ) uip
+ ON users.name = uip.user_id
+ AND appservice_id is NULL
+ AND users.creation_ts < ?
+ AND uip.last_seen/1000 > ?
+ AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+ GROUP BY users.name, users.creation_ts
+ ) u
+ """
+
+ txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+ count, = txn.fetchone()
+ results["all"] = count
+
+ return results
+
+ return self.runInteraction("count_r30_users", _count_r30_users)
+
+ def _get_start_of_day(self):
+ """
+ Returns millisecond unixtime for start of UTC day.
+ """
+ now = time.gmtime()
+ today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
+ return today_start * 1000
+
+ def generate_user_daily_visits(self):
+ """
+ Generates daily visit data for use in cohort/ retention analysis
+ """
+
+ def _generate_user_daily_visits(txn):
+ logger.info("Calling _generate_user_daily_visits")
+ today_start = self._get_start_of_day()
+ a_day_in_milliseconds = 24 * 60 * 60 * 1000
+ now = self.clock.time_msec()
+
+ sql = """
+ INSERT INTO user_daily_visits (user_id, device_id, timestamp)
+ SELECT u.user_id, u.device_id, ?
+ FROM user_ips AS u
+ LEFT JOIN (
+ SELECT user_id, device_id, timestamp FROM user_daily_visits
+ WHERE timestamp = ?
+ ) udv
+ ON u.user_id = udv.user_id AND u.device_id=udv.device_id
+ INNER JOIN users ON users.name=u.user_id
+ WHERE last_seen > ? AND last_seen <= ?
+ AND udv.timestamp IS NULL AND users.is_guest=0
+ AND users.appservice_id IS NULL
+ GROUP BY u.user_id, u.device_id
+ """
+
+ # This means that the day has rolled over but there could still
+ # be entries from the previous day. There is an edge case
+ # where if the user logs in at 23:59 and overwrites their
+ # last_seen at 00:01 then they will not be counted in the
+ # previous day's stats - it is important that the query is run
+ # often to minimise this case.
+ if today_start > self._last_user_visit_update:
+ yesterday_start = today_start - a_day_in_milliseconds
+ txn.execute(
+ sql,
+ (
+ yesterday_start,
+ yesterday_start,
+ self._last_user_visit_update,
+ today_start,
+ ),
+ )
+ self._last_user_visit_update = today_start
+
+ txn.execute(
+ sql, (today_start, today_start, self._last_user_visit_update, now)
+ )
+ # Update _last_user_visit_update to now. The reason to do this
+ # rather just clamping to the beginning of the day is to limit
+ # the size of the join - meaning that the query can be run more
+ # frequently
+ self._last_user_visit_update = now
+
+ return self.runInteraction(
+ "generate_user_daily_visits", _generate_user_daily_visits
+ )
+
+ def get_users(self):
+ """Function to reterive a list of users in users table.
+
+ Args:
+ Returns:
+ defer.Deferred: resolves to list[dict[str, Any]]
+ """
+ return self._simple_select_list(
+ table="users",
+ keyvalues={},
+ retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
+ desc="get_users",
+ )
+
+ @defer.inlineCallbacks
+ def get_users_paginate(self, order, start, limit):
+ """Function to reterive a paginated list of users from
+ users list. This will return a json object, which contains
+ list of users and the total number of users in users table.
+
+ Args:
+ order (str): column name to order the select by this column
+ start (int): start number to begin the query from
+ limit (int): number of rows to reterive
+ Returns:
+ defer.Deferred: resolves to json object {list[dict[str, Any]], count}
+ """
+ users = yield self.runInteraction(
+ "get_users_paginate",
+ self._simple_select_list_paginate_txn,
+ table="users",
+ keyvalues={"is_guest": False},
+ orderby=order,
+ start=start,
+ limit=limit,
+ retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
+ )
+ count = yield self.runInteraction("get_users_paginate", self.get_user_count_txn)
+ retval = {"users": users, "total": count}
+ return retval
+
+ def search_users(self, term):
+ """Function to search users list for one or more users with
+ the matched term.
+
+ Args:
+ term (str): search term
+ col (str): column to query term should be matched to
+ Returns:
+ defer.Deferred: resolves to list[dict[str, Any]]
+ """
+ return self._simple_search_list(
+ table="users",
+ term=term,
+ col="name",
+ retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
+ desc="search_users",
+ )
diff --git a/synapse/storage/account_data.py b/synapse/storage/data_stores/main/account_data.py
index 6afbfc0d74..6afbfc0d74 100644
--- a/synapse/storage/account_data.py
+++ b/synapse/storage/data_stores/main/account_data.py
diff --git a/synapse/storage/appservice.py b/synapse/storage/data_stores/main/appservice.py
index 435b2acd4d..81babf2029 100644
--- a/synapse/storage/appservice.py
+++ b/synapse/storage/data_stores/main/appservice.py
@@ -22,9 +22,8 @@ from twisted.internet import defer
from synapse.appservice import AppServiceTransaction
from synapse.config.appservice import load_appservices
-from synapse.storage.events_worker import EventsWorkerStore
-
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
logger = logging.getLogger(__name__)
diff --git a/synapse/storage/client_ips.py b/synapse/storage/data_stores/main/client_ips.py
index 067820a5da..706c6a1f3f 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/data_stores/main/client_ips.py
@@ -20,11 +20,10 @@ from six import iteritems
from twisted.internet import defer
from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.storage import background_updates
+from synapse.storage._base import Cache
from synapse.util.caches import CACHE_SIZE_FACTOR
-from . import background_updates
-from ._base import Cache
-
logger = logging.getLogger(__name__)
# Number of msec of granularity to store the user IP 'last seen' time. Smaller
diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py
index f04aad0743..f04aad0743 100644
--- a/synapse/storage/deviceinbox.py
+++ b/synapse/storage/data_stores/main/deviceinbox.py
diff --git a/synapse/storage/devices.py b/synapse/storage/data_stores/main/devices.py
index ac5239e50a..ac5239e50a 100644
--- a/synapse/storage/devices.py
+++ b/synapse/storage/data_stores/main/devices.py
diff --git a/synapse/storage/directory.py b/synapse/storage/data_stores/main/directory.py
index eed7757ed5..297966d9f4 100644
--- a/synapse/storage/directory.py
+++ b/synapse/storage/data_stores/main/directory.py
@@ -18,10 +18,9 @@ from collections import namedtuple
from twisted.internet import defer
from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
from synapse.util.caches.descriptors import cached
-from ._base import SQLBaseStore
-
RoomAliasMapping = namedtuple("RoomAliasMapping", ("room_id", "room_alias", "servers"))
diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/data_stores/main/e2e_room_keys.py
index be2fe2bab6..ef88e79293 100644
--- a/synapse/storage/e2e_room_keys.py
+++ b/synapse/storage/data_stores/main/e2e_room_keys.py
@@ -19,8 +19,7 @@ from twisted.internet import defer
from synapse.api.errors import StoreError
from synapse.logging.opentracing import log_kv, trace
-
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
class EndToEndRoomKeyStore(SQLBaseStore):
diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/data_stores/main/end_to_end_keys.py
index 872bc75490..7e44f41046 100644
--- a/synapse/storage/end_to_end_keys.py
+++ b/synapse/storage/data_stores/main/end_to_end_keys.py
@@ -19,10 +19,9 @@ from canonicaljson import encode_canonical_json
from twisted.internet import defer
from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.util.caches.descriptors import cached
-from ._base import SQLBaseStore, db_to_json
-
class EndToEndKeyWorkerStore(SQLBaseStore):
@trace
diff --git a/synapse/storage/event_federation.py b/synapse/storage/data_stores/main/event_federation.py
index 47cc10d32a..a470a48e0f 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/data_stores/main/event_federation.py
@@ -26,8 +26,8 @@ from twisted.internet import defer
from synapse.api.errors import StoreError
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
-from synapse.storage.events_worker import EventsWorkerStore
-from synapse.storage.signatures import SignatureWorkerStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
+from synapse.storage.data_stores.main.signatures import SignatureWorkerStore
from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__)
diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/data_stores/main/event_push_actions.py
index 22025effbc..22025effbc 100644
--- a/synapse/storage/event_push_actions.py
+++ b/synapse/storage/data_stores/main/event_push_actions.py
diff --git a/synapse/storage/events.py b/synapse/storage/data_stores/main/events.py
index ee49ef235d..03b5111c5d 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/data_stores/main/events.py
@@ -41,9 +41,9 @@ from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.state import StateResolutionStore
from synapse.storage._base import make_in_list_sql_clause
from synapse.storage.background_updates import BackgroundUpdateStore
-from synapse.storage.event_federation import EventFederationStore
-from synapse.storage.events_worker import EventsWorkerStore
-from synapse.storage.state import StateGroupWorkerStore
+from synapse.storage.data_stores.main.event_federation import EventFederationStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
+from synapse.storage.data_stores.main.state import StateGroupWorkerStore
from synapse.types import RoomStreamToken, get_domain_from_id
from synapse.util import batch_iter
from synapse.util.async_helpers import ObservableDeferred
diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py
index 31ea6f917f..31ea6f917f 100644
--- a/synapse/storage/events_bg_updates.py
+++ b/synapse/storage/data_stores/main/events_bg_updates.py
diff --git a/synapse/storage/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 4c4b76bd93..4c4b76bd93 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
diff --git a/synapse/storage/filtering.py b/synapse/storage/data_stores/main/filtering.py
index 7c2a7da836..a2a2a67927 100644
--- a/synapse/storage/filtering.py
+++ b/synapse/storage/data_stores/main/filtering.py
@@ -16,10 +16,9 @@
from canonicaljson import encode_canonical_json
from synapse.api.errors import Codes, SynapseError
+from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.util.caches.descriptors import cachedInlineCallbacks
-from ._base import SQLBaseStore, db_to_json
-
class FilteringStore(SQLBaseStore):
@cachedInlineCallbacks(num_args=2)
diff --git a/synapse/storage/group_server.py b/synapse/storage/data_stores/main/group_server.py
index 15b01c6958..aeae5a2b28 100644
--- a/synapse/storage/group_server.py
+++ b/synapse/storage/data_stores/main/group_server.py
@@ -19,8 +19,7 @@ from canonicaljson import json
from twisted.internet import defer
from synapse.api.errors import SynapseError
-
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
# The category ID for the "default" category. We don't store as null in the
# database to avoid the fun of null != null
diff --git a/synapse/storage/data_stores/main/keys.py b/synapse/storage/data_stores/main/keys.py
new file mode 100644
index 0000000000..ebc7db3ed6
--- /dev/null
+++ b/synapse/storage/data_stores/main/keys.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import logging
+
+import six
+
+from signedjson.key import decode_verify_key_bytes
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.keys import FetchKeyResult
+from synapse.util import batch_iter
+from synapse.util.caches.descriptors import cached, cachedList
+
+logger = logging.getLogger(__name__)
+
+# py2 sqlite has buffer hardcoded as only binary type, so we must use it,
+# despite being deprecated and removed in favor of memoryview
+if six.PY2:
+ db_binary_type = six.moves.builtins.buffer
+else:
+ db_binary_type = memoryview
+
+
+class KeyStore(SQLBaseStore):
+ """Persistence for signature verification keys
+ """
+
+ @cached()
+ def _get_server_verify_key(self, server_name_and_key_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids"
+ )
+ def get_server_verify_keys(self, server_name_and_key_ids):
+ """
+ Args:
+ server_name_and_key_ids (iterable[Tuple[str, str]]):
+ iterable of (server_name, key-id) tuples to fetch keys for
+
+ Returns:
+ Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]:
+ map from (server_name, key_id) -> FetchKeyResult, or None if the key is
+ unknown
+ """
+ keys = {}
+
+ def _get_keys(txn, batch):
+ """Processes a batch of keys to fetch, and adds the result to `keys`."""
+
+ # batch_iter always returns tuples so it's safe to do len(batch)
+ sql = (
+ "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
+ "FROM server_signature_keys WHERE 1=0"
+ ) + " OR (server_name=? AND key_id=?)" * len(batch)
+
+ txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
+
+ for row in txn:
+ server_name, key_id, key_bytes, ts_valid_until_ms = row
+
+ if ts_valid_until_ms is None:
+ # Old keys may be stored with a ts_valid_until_ms of null,
+ # in which case we treat this as if it was set to `0`, i.e.
+ # it won't match key requests that define a minimum
+ # `ts_valid_until_ms`.
+ ts_valid_until_ms = 0
+
+ res = FetchKeyResult(
+ verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
+ valid_until_ts=ts_valid_until_ms,
+ )
+ keys[(server_name, key_id)] = res
+
+ def _txn(txn):
+ for batch in batch_iter(server_name_and_key_ids, 50):
+ _get_keys(txn, batch)
+ return keys
+
+ return self.runInteraction("get_server_verify_keys", _txn)
+
+ def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys):
+ """Stores NACL verification keys for remote servers.
+ Args:
+ from_server (str): Where the verification keys were looked up
+ ts_added_ms (int): The time to record that the key was added
+ verify_keys (iterable[tuple[str, str, FetchKeyResult]]):
+ keys to be stored. Each entry is a triplet of
+ (server_name, key_id, key).
+ """
+ key_values = []
+ value_values = []
+ invalidations = []
+ for server_name, key_id, fetch_result in verify_keys:
+ key_values.append((server_name, key_id))
+ value_values.append(
+ (
+ from_server,
+ ts_added_ms,
+ fetch_result.valid_until_ts,
+ db_binary_type(fetch_result.verify_key.encode()),
+ )
+ )
+ # invalidate takes a tuple corresponding to the params of
+ # _get_server_verify_key. _get_server_verify_key only takes one
+ # param, which is itself the 2-tuple (server_name, key_id).
+ invalidations.append((server_name, key_id))
+
+ def _invalidate(res):
+ f = self._get_server_verify_key.invalidate
+ for i in invalidations:
+ f((i,))
+ return res
+
+ return self.runInteraction(
+ "store_server_verify_keys",
+ self._simple_upsert_many_txn,
+ table="server_signature_keys",
+ key_names=("server_name", "key_id"),
+ key_values=key_values,
+ value_names=(
+ "from_server",
+ "ts_added_ms",
+ "ts_valid_until_ms",
+ "verify_key",
+ ),
+ value_values=value_values,
+ ).addCallback(_invalidate)
+
+ def store_server_keys_json(
+ self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes
+ ):
+ """Stores the JSON bytes for a set of keys from a server
+ The JSON should be signed by the originating server, the intermediate
+ server, and by this server. Updates the value for the
+ (server_name, key_id, from_server) triplet if one already existed.
+ Args:
+ server_name (str): The name of the server.
+ key_id (str): The identifer of the key this JSON is for.
+ from_server (str): The server this JSON was fetched from.
+ ts_now_ms (int): The time now in milliseconds.
+ ts_valid_until_ms (int): The time when this json stops being valid.
+ key_json (bytes): The encoded JSON.
+ """
+ return self._simple_upsert(
+ table="server_keys_json",
+ keyvalues={
+ "server_name": server_name,
+ "key_id": key_id,
+ "from_server": from_server,
+ },
+ values={
+ "server_name": server_name,
+ "key_id": key_id,
+ "from_server": from_server,
+ "ts_added_ms": ts_now_ms,
+ "ts_valid_until_ms": ts_expires_ms,
+ "key_json": db_binary_type(key_json_bytes),
+ },
+ desc="store_server_keys_json",
+ )
+
+ def get_server_keys_json(self, server_keys):
+ """Retrive the key json for a list of server_keys and key ids.
+ If no keys are found for a given server, key_id and source then
+ that server, key_id, and source triplet entry will be an empty list.
+ The JSON is returned as a byte array so that it can be efficiently
+ used in an HTTP response.
+ Args:
+ server_keys (list): List of (server_name, key_id, source) triplets.
+ Returns:
+ Deferred[dict[Tuple[str, str, str|None], list[dict]]]:
+ Dict mapping (server_name, key_id, source) triplets to lists of dicts
+ """
+
+ def _get_server_keys_json_txn(txn):
+ results = {}
+ for server_name, key_id, from_server in server_keys:
+ keyvalues = {"server_name": server_name}
+ if key_id is not None:
+ keyvalues["key_id"] = key_id
+ if from_server is not None:
+ keyvalues["from_server"] = from_server
+ rows = self._simple_select_list_txn(
+ txn,
+ "server_keys_json",
+ keyvalues=keyvalues,
+ retcols=(
+ "key_id",
+ "from_server",
+ "ts_added_ms",
+ "ts_valid_until_ms",
+ "key_json",
+ ),
+ )
+ results[(server_name, key_id, from_server)] = rows
+ return results
+
+ return self.runInteraction("get_server_keys_json", _get_server_keys_json_txn)
diff --git a/synapse/storage/media_repository.py b/synapse/storage/data_stores/main/media_repository.py
index 84b5f3ad5e..84b5f3ad5e 100644
--- a/synapse/storage/media_repository.py
+++ b/synapse/storage/data_stores/main/media_repository.py
diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/data_stores/main/monthly_active_users.py
index 3803604be7..e6ee1e4aaa 100644
--- a/synapse/storage/monthly_active_users.py
+++ b/synapse/storage/data_stores/main/monthly_active_users.py
@@ -16,10 +16,9 @@ import logging
from twisted.internet import defer
+from synapse.storage._base import SQLBaseStore
from synapse.util.caches.descriptors import cached
-from ._base import SQLBaseStore
-
logger = logging.getLogger(__name__)
# Number of msec of granularity to store the monthly_active_user timestamp
diff --git a/synapse/storage/openid.py b/synapse/storage/data_stores/main/openid.py
index b3318045ee..79b40044d9 100644
--- a/synapse/storage/openid.py
+++ b/synapse/storage/data_stores/main/openid.py
@@ -1,4 +1,4 @@
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
class OpenIdStore(SQLBaseStore):
diff --git a/synapse/storage/data_stores/main/presence.py b/synapse/storage/data_stores/main/presence.py
new file mode 100644
index 0000000000..523ed6575e
--- /dev/null
+++ b/synapse/storage/data_stores/main/presence.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.presence import UserPresenceState
+from synapse.util import batch_iter
+from synapse.util.caches.descriptors import cached, cachedList
+
+
+class PresenceStore(SQLBaseStore):
+ @defer.inlineCallbacks
+ def update_presence(self, presence_states):
+ stream_ordering_manager = self._presence_id_gen.get_next_mult(
+ len(presence_states)
+ )
+
+ with stream_ordering_manager as stream_orderings:
+ yield self.runInteraction(
+ "update_presence",
+ self._update_presence_txn,
+ stream_orderings,
+ presence_states,
+ )
+
+ return stream_orderings[-1], self._presence_id_gen.get_current_token()
+
+ def _update_presence_txn(self, txn, stream_orderings, presence_states):
+ for stream_id, state in zip(stream_orderings, presence_states):
+ txn.call_after(
+ self.presence_stream_cache.entity_has_changed, state.user_id, stream_id
+ )
+ txn.call_after(self._get_presence_for_user.invalidate, (state.user_id,))
+
+ # Actually insert new rows
+ self._simple_insert_many_txn(
+ txn,
+ table="presence_stream",
+ values=[
+ {
+ "stream_id": stream_id,
+ "user_id": state.user_id,
+ "state": state.state,
+ "last_active_ts": state.last_active_ts,
+ "last_federation_update_ts": state.last_federation_update_ts,
+ "last_user_sync_ts": state.last_user_sync_ts,
+ "status_msg": state.status_msg,
+ "currently_active": state.currently_active,
+ }
+ for state in presence_states
+ ],
+ )
+
+ # Delete old rows to stop database from getting really big
+ sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
+
+ for states in batch_iter(presence_states, 50):
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "user_id", [s.user_id for s in states]
+ )
+ txn.execute(sql + clause, [stream_id] + list(args))
+
+ def get_all_presence_updates(self, last_id, current_id):
+ if last_id == current_id:
+ return defer.succeed([])
+
+ def get_all_presence_updates_txn(txn):
+ sql = (
+ "SELECT stream_id, user_id, state, last_active_ts,"
+ " last_federation_update_ts, last_user_sync_ts, status_msg,"
+ " currently_active"
+ " FROM presence_stream"
+ " WHERE ? < stream_id AND stream_id <= ?"
+ )
+ txn.execute(sql, (last_id, current_id))
+ return txn.fetchall()
+
+ return self.runInteraction(
+ "get_all_presence_updates", get_all_presence_updates_txn
+ )
+
+ @cached()
+ def _get_presence_for_user(self, user_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_presence_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def get_presence_for_users(self, user_ids):
+ rows = yield self._simple_select_many_batch(
+ table="presence_stream",
+ column="user_id",
+ iterable=user_ids,
+ keyvalues={},
+ retcols=(
+ "user_id",
+ "state",
+ "last_active_ts",
+ "last_federation_update_ts",
+ "last_user_sync_ts",
+ "status_msg",
+ "currently_active",
+ ),
+ desc="get_presence_for_users",
+ )
+
+ for row in rows:
+ row["currently_active"] = bool(row["currently_active"])
+
+ return {row["user_id"]: UserPresenceState(**row) for row in rows}
+
+ def get_current_presence_token(self):
+ return self._presence_id_gen.get_current_token()
+
+ def allow_presence_visible(self, observed_localpart, observer_userid):
+ return self._simple_insert(
+ table="presence_allow_inbound",
+ values={
+ "observed_user_id": observed_localpart,
+ "observer_user_id": observer_userid,
+ },
+ desc="allow_presence_visible",
+ or_ignore=True,
+ )
+
+ def disallow_presence_visible(self, observed_localpart, observer_userid):
+ return self._simple_delete_one(
+ table="presence_allow_inbound",
+ keyvalues={
+ "observed_user_id": observed_localpart,
+ "observer_user_id": observer_userid,
+ },
+ desc="disallow_presence_visible",
+ )
diff --git a/synapse/storage/profile.py b/synapse/storage/data_stores/main/profile.py
index 912c1df6be..e4e8a1c1d6 100644
--- a/synapse/storage/profile.py
+++ b/synapse/storage/data_stores/main/profile.py
@@ -16,9 +16,8 @@
from twisted.internet import defer
from synapse.api.errors import StoreError
-from synapse.storage.roommember import ProfileInfo
-
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.data_stores.main.roommember import ProfileInfo
class ProfileWorkerStore(SQLBaseStore):
diff --git a/synapse/storage/data_stores/main/push_rule.py b/synapse/storage/data_stores/main/push_rule.py
new file mode 100644
index 0000000000..cd95f1ce60
--- /dev/null
+++ b/synapse/storage/data_stores/main/push_rule.py
@@ -0,0 +1,713 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.push.baserules import list_with_base_rules
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.data_stores.main.appservice import ApplicationServiceWorkerStore
+from synapse.storage.data_stores.main.pusher import PusherWorkerStore
+from synapse.storage.data_stores.main.receipts import ReceiptsWorkerStore
+from synapse.storage.data_stores.main.roommember import RoomMemberWorkerStore
+from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
+from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+def _load_rules(rawrules, enabled_map):
+ ruleslist = []
+ for rawrule in rawrules:
+ rule = dict(rawrule)
+ rule["conditions"] = json.loads(rawrule["conditions"])
+ rule["actions"] = json.loads(rawrule["actions"])
+ ruleslist.append(rule)
+
+ # We're going to be mutating this a lot, so do a deep copy
+ rules = list(list_with_base_rules(ruleslist))
+
+ for i, rule in enumerate(rules):
+ rule_id = rule["rule_id"]
+ if rule_id in enabled_map:
+ if rule.get("enabled", True) != bool(enabled_map[rule_id]):
+ # Rules are cached across users.
+ rule = dict(rule)
+ rule["enabled"] = bool(enabled_map[rule_id])
+ rules[i] = rule
+
+ return rules
+
+
+class PushRulesWorkerStore(
+ ApplicationServiceWorkerStore,
+ ReceiptsWorkerStore,
+ PusherWorkerStore,
+ RoomMemberWorkerStore,
+ SQLBaseStore,
+):
+ """This is an abstract base class where subclasses must implement
+ `get_max_push_rules_stream_id` which can be called in the initializer.
+ """
+
+ # This ABCMeta metaclass ensures that we cannot be instantiated without
+ # the abstract methods being implemented.
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, db_conn, hs):
+ super(PushRulesWorkerStore, self).__init__(db_conn, hs)
+
+ push_rules_prefill, push_rules_id = self._get_cache_dict(
+ db_conn,
+ "push_rules_stream",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self.get_max_push_rules_stream_id(),
+ )
+
+ self.push_rules_stream_cache = StreamChangeCache(
+ "PushRulesStreamChangeCache",
+ push_rules_id,
+ prefilled_cache=push_rules_prefill,
+ )
+
+ @abc.abstractmethod
+ def get_max_push_rules_stream_id(self):
+ """Get the position of the push rules stream.
+
+ Returns:
+ int
+ """
+ raise NotImplementedError()
+
+ @cachedInlineCallbacks(max_entries=5000)
+ def get_push_rules_for_user(self, user_id):
+ rows = yield self._simple_select_list(
+ table="push_rules",
+ keyvalues={"user_name": user_id},
+ retcols=(
+ "user_name",
+ "rule_id",
+ "priority_class",
+ "priority",
+ "conditions",
+ "actions",
+ ),
+ desc="get_push_rules_enabled_for_user",
+ )
+
+ rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+ enabled_map = yield self.get_push_rules_enabled_for_user(user_id)
+
+ rules = _load_rules(rows, enabled_map)
+
+ return rules
+
+ @cachedInlineCallbacks(max_entries=5000)
+ def get_push_rules_enabled_for_user(self, user_id):
+ results = yield self._simple_select_list(
+ table="push_rules_enable",
+ keyvalues={"user_name": user_id},
+ retcols=("user_name", "rule_id", "enabled"),
+ desc="get_push_rules_enabled_for_user",
+ )
+ return {r["rule_id"]: False if r["enabled"] == 0 else True for r in results}
+
+ def have_push_rules_changed_for_user(self, user_id, last_id):
+ if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id):
+ return defer.succeed(False)
+ else:
+
+ def have_push_rules_changed_txn(txn):
+ sql = (
+ "SELECT COUNT(stream_id) FROM push_rules_stream"
+ " WHERE user_id = ? AND ? < stream_id"
+ )
+ txn.execute(sql, (user_id, last_id))
+ count, = txn.fetchone()
+ return bool(count)
+
+ return self.runInteraction(
+ "have_push_rules_changed", have_push_rules_changed_txn
+ )
+
+ @cachedList(
+ cached_method_name="get_push_rules_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def bulk_get_push_rules(self, user_ids):
+ if not user_ids:
+ return {}
+
+ results = {user_id: [] for user_id in user_ids}
+
+ rows = yield self._simple_select_many_batch(
+ table="push_rules",
+ column="user_name",
+ iterable=user_ids,
+ retcols=("*",),
+ desc="bulk_get_push_rules",
+ )
+
+ rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+ for row in rows:
+ results.setdefault(row["user_name"], []).append(row)
+
+ enabled_map_by_user = yield self.bulk_get_push_rules_enabled(user_ids)
+
+ for user_id, rules in results.items():
+ results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
+
+ return results
+
+ @defer.inlineCallbacks
+ def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
+ """Copy a single push rule from one room to another for a specific user.
+
+ Args:
+ new_room_id (str): ID of the new room.
+ user_id (str): ID of user the push rule belongs to.
+ rule (Dict): A push rule.
+ """
+ # Create new rule id
+ rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1])
+ new_rule_id = rule_id_scope + "/" + new_room_id
+
+ # Change room id in each condition
+ for condition in rule.get("conditions", []):
+ if condition.get("key") == "room_id":
+ condition["pattern"] = new_room_id
+
+ # Add the rule for the new room
+ yield self.add_push_rule(
+ user_id=user_id,
+ rule_id=new_rule_id,
+ priority_class=rule["priority_class"],
+ conditions=rule["conditions"],
+ actions=rule["actions"],
+ )
+
+ @defer.inlineCallbacks
+ def copy_push_rules_from_room_to_room_for_user(
+ self, old_room_id, new_room_id, user_id
+ ):
+ """Copy all of the push rules from one room to another for a specific
+ user.
+
+ Args:
+ old_room_id (str): ID of the old room.
+ new_room_id (str): ID of the new room.
+ user_id (str): ID of user to copy push rules for.
+ """
+ # Retrieve push rules for this user
+ user_push_rules = yield self.get_push_rules_for_user(user_id)
+
+ # Get rules relating to the old room and copy them to the new room
+ for rule in user_push_rules:
+ conditions = rule.get("conditions", [])
+ if any(
+ (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
+ for c in conditions
+ ):
+ yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule)
+
+ @defer.inlineCallbacks
+ def bulk_get_push_rules_for_room(self, event, context):
+ state_group = context.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ current_state_ids = yield context.get_current_state_ids(self)
+ result = yield self._bulk_get_push_rules_for_room(
+ event.room_id, state_group, current_state_ids, event=event
+ )
+ return result
+
+ @cachedInlineCallbacks(num_args=2, cache_context=True)
+ def _bulk_get_push_rules_for_room(
+ self, room_id, state_group, current_state_ids, cache_context, event=None
+ ):
+ # We don't use `state_group`, its there so that we can cache based
+ # on it. However, its important that its never None, since two current_state's
+ # with a state_group of None are likely to be different.
+ # See bulk_get_push_rules_for_room for how we work around this.
+ assert state_group is not None
+
+ # We also will want to generate notifs for other people in the room so
+ # their unread countss are correct in the event stream, but to avoid
+ # generating them for bot / AS users etc, we only do so for people who've
+ # sent a read receipt into the room.
+
+ users_in_room = yield self._get_joined_users_from_context(
+ room_id,
+ state_group,
+ current_state_ids,
+ on_invalidate=cache_context.invalidate,
+ event=event,
+ )
+
+ # We ignore app service users for now. This is so that we don't fill
+ # up the `get_if_users_have_pushers` cache with AS entries that we
+ # know don't have pushers, nor even read receipts.
+ local_users_in_room = set(
+ u
+ for u in users_in_room
+ if self.hs.is_mine_id(u)
+ and not self.get_if_app_services_interested_in_user(u)
+ )
+
+ # users in the room who have pushers need to get push rules run because
+ # that's how their pushers work
+ if_users_with_pushers = yield self.get_if_users_have_pushers(
+ local_users_in_room, on_invalidate=cache_context.invalidate
+ )
+ user_ids = set(
+ uid for uid, have_pusher in if_users_with_pushers.items() if have_pusher
+ )
+
+ users_with_receipts = yield self.get_users_with_read_receipts_in_room(
+ room_id, on_invalidate=cache_context.invalidate
+ )
+
+ # any users with pushers must be ours: they have pushers
+ for uid in users_with_receipts:
+ if uid in local_users_in_room:
+ user_ids.add(uid)
+
+ rules_by_user = yield self.bulk_get_push_rules(
+ user_ids, on_invalidate=cache_context.invalidate
+ )
+
+ rules_by_user = {k: v for k, v in rules_by_user.items() if v is not None}
+
+ return rules_by_user
+
+ @cachedList(
+ cached_method_name="get_push_rules_enabled_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def bulk_get_push_rules_enabled(self, user_ids):
+ if not user_ids:
+ return {}
+
+ results = {user_id: {} for user_id in user_ids}
+
+ rows = yield self._simple_select_many_batch(
+ table="push_rules_enable",
+ column="user_name",
+ iterable=user_ids,
+ retcols=("user_name", "rule_id", "enabled"),
+ desc="bulk_get_push_rules_enabled",
+ )
+ for row in rows:
+ enabled = bool(row["enabled"])
+ results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled
+ return results
+
+
+class PushRuleStore(PushRulesWorkerStore):
+ @defer.inlineCallbacks
+ def add_push_rule(
+ self,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions,
+ actions,
+ before=None,
+ after=None,
+ ):
+ conditions_json = json.dumps(conditions)
+ actions_json = json.dumps(actions)
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ if before or after:
+ yield self.runInteraction(
+ "_add_push_rule_relative_txn",
+ self._add_push_rule_relative_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ before,
+ after,
+ )
+ else:
+ yield self.runInteraction(
+ "_add_push_rule_highest_priority_txn",
+ self._add_push_rule_highest_priority_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ )
+
+ def _add_push_rule_relative_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ before,
+ after,
+ ):
+ # Lock the table since otherwise we'll have annoying races between the
+ # SELECT here and the UPSERT below.
+ self.database_engine.lock_table(txn, "push_rules")
+
+ relative_to_rule = before or after
+
+ res = self._simple_select_one_txn(
+ txn,
+ table="push_rules",
+ keyvalues={"user_name": user_id, "rule_id": relative_to_rule},
+ retcols=["priority_class", "priority"],
+ allow_none=True,
+ )
+
+ if not res:
+ raise RuleNotFoundException(
+ "before/after rule not found: %s" % (relative_to_rule,)
+ )
+
+ base_priority_class = res["priority_class"]
+ base_rule_priority = res["priority"]
+
+ if base_priority_class != priority_class:
+ raise InconsistentRuleException(
+ "Given priority class does not match class of relative rule"
+ )
+
+ if before:
+ # Higher priority rules are executed first, So adding a rule before
+ # a rule means giving it a higher priority than that rule.
+ new_rule_priority = base_rule_priority + 1
+ else:
+ # We increment the priority of the existing rules to make space for
+ # the new rule. Therefore if we want this rule to appear after
+ # an existing rule we give it the priority of the existing rule,
+ # and then increment the priority of the existing rule.
+ new_rule_priority = base_rule_priority
+
+ sql = (
+ "UPDATE push_rules SET priority = priority + 1"
+ " WHERE user_name = ? AND priority_class = ? AND priority >= ?"
+ )
+
+ txn.execute(sql, (user_id, priority_class, new_rule_priority))
+
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ new_rule_priority,
+ conditions_json,
+ actions_json,
+ )
+
+ def _add_push_rule_highest_priority_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ ):
+ # Lock the table since otherwise we'll have annoying races between the
+ # SELECT here and the UPSERT below.
+ self.database_engine.lock_table(txn, "push_rules")
+
+ # find the highest priority rule in that class
+ sql = (
+ "SELECT COUNT(*), MAX(priority) FROM push_rules"
+ " WHERE user_name = ? and priority_class = ?"
+ )
+ txn.execute(sql, (user_id, priority_class))
+ res = txn.fetchall()
+ (how_many, highest_prio) = res[0]
+
+ new_prio = 0
+ if how_many > 0:
+ new_prio = highest_prio + 1
+
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ new_prio,
+ conditions_json,
+ actions_json,
+ )
+
+ def _upsert_push_rule_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ priority,
+ conditions_json,
+ actions_json,
+ update_stream=True,
+ ):
+ """Specialised version of _simple_upsert_txn that picks a push_rule_id
+ using the _push_rule_id_gen if it needs to insert the rule. It assumes
+ that the "push_rules" table is locked"""
+
+ sql = (
+ "UPDATE push_rules"
+ " SET priority_class = ?, priority = ?, conditions = ?, actions = ?"
+ " WHERE user_name = ? AND rule_id = ?"
+ )
+
+ txn.execute(
+ sql,
+ (priority_class, priority, conditions_json, actions_json, user_id, rule_id),
+ )
+
+ if txn.rowcount == 0:
+ # We didn't update a row with the given rule_id so insert one
+ push_rule_id = self._push_rule_id_gen.get_next()
+
+ self._simple_insert_txn(
+ txn,
+ table="push_rules",
+ values={
+ "id": push_rule_id,
+ "user_name": user_id,
+ "rule_id": rule_id,
+ "priority_class": priority_class,
+ "priority": priority,
+ "conditions": conditions_json,
+ "actions": actions_json,
+ },
+ )
+
+ if update_stream:
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ADD",
+ data={
+ "priority_class": priority_class,
+ "priority": priority,
+ "conditions": conditions_json,
+ "actions": actions_json,
+ },
+ )
+
+ @defer.inlineCallbacks
+ def delete_push_rule(self, user_id, rule_id):
+ """
+ Delete a push rule. Args specify the row to be deleted and can be
+ any of the columns in the push_rule table, but below are the
+ standard ones
+
+ Args:
+ user_id (str): The matrix ID of the push rule owner
+ rule_id (str): The rule_id of the rule to be deleted
+ """
+
+ def delete_push_rule_txn(txn, stream_id, event_stream_ordering):
+ self._simple_delete_one_txn(
+ txn, "push_rules", {"user_name": user_id, "rule_id": rule_id}
+ )
+
+ self._insert_push_rules_update_txn(
+ txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE"
+ )
+
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.runInteraction(
+ "delete_push_rule",
+ delete_push_rule_txn,
+ stream_id,
+ event_stream_ordering,
+ )
+
+ @defer.inlineCallbacks
+ def set_push_rule_enabled(self, user_id, rule_id, enabled):
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.runInteraction(
+ "_set_push_rule_enabled_txn",
+ self._set_push_rule_enabled_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ enabled,
+ )
+
+ def _set_push_rule_enabled_txn(
+ self, txn, stream_id, event_stream_ordering, user_id, rule_id, enabled
+ ):
+ new_id = self._push_rules_enable_id_gen.get_next()
+ self._simple_upsert_txn(
+ txn,
+ "push_rules_enable",
+ {"user_name": user_id, "rule_id": rule_id},
+ {"enabled": 1 if enabled else 0},
+ {"id": new_id},
+ )
+
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ENABLE" if enabled else "DISABLE",
+ )
+
+ @defer.inlineCallbacks
+ def set_push_rule_actions(self, user_id, rule_id, actions, is_default_rule):
+ actions_json = json.dumps(actions)
+
+ def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering):
+ if is_default_rule:
+ # Add a dummy rule to the rules table with the user specified
+ # actions.
+ priority_class = -1
+ priority = 1
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ priority,
+ "[]",
+ actions_json,
+ update_stream=False,
+ )
+ else:
+ self._simple_update_one_txn(
+ txn,
+ "push_rules",
+ {"user_name": user_id, "rule_id": rule_id},
+ {"actions": actions_json},
+ )
+
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ACTIONS",
+ data={"actions": actions_json},
+ )
+
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.runInteraction(
+ "set_push_rule_actions",
+ set_push_rule_actions_txn,
+ stream_id,
+ event_stream_ordering,
+ )
+
+ def _insert_push_rules_update_txn(
+ self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None
+ ):
+ values = {
+ "stream_id": stream_id,
+ "event_stream_ordering": event_stream_ordering,
+ "user_id": user_id,
+ "rule_id": rule_id,
+ "op": op,
+ }
+ if data is not None:
+ values.update(data)
+
+ self._simple_insert_txn(txn, "push_rules_stream", values=values)
+
+ txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,))
+ txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,))
+ txn.call_after(
+ self.push_rules_stream_cache.entity_has_changed, user_id, stream_id
+ )
+
+ def get_all_push_rule_updates(self, last_id, current_id, limit):
+ """Get all the push rules changes that have happend on the server"""
+ if last_id == current_id:
+ return defer.succeed([])
+
+ def get_all_push_rule_updates_txn(txn):
+ sql = (
+ "SELECT stream_id, event_stream_ordering, user_id, rule_id,"
+ " op, priority_class, priority, conditions, actions"
+ " FROM push_rules_stream"
+ " WHERE ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC LIMIT ?"
+ )
+ txn.execute(sql, (last_id, current_id, limit))
+ return txn.fetchall()
+
+ return self.runInteraction(
+ "get_all_push_rule_updates", get_all_push_rule_updates_txn
+ )
+
+ def get_push_rules_stream_token(self):
+ """Get the position of the push rules stream.
+ Returns a pair of a stream id for the push_rules stream and the
+ room stream ordering it corresponds to."""
+ return self._push_rules_stream_id_gen.get_current_token()
+
+ def get_max_push_rules_stream_id(self):
+ return self.get_push_rules_stream_token()[0]
diff --git a/synapse/storage/pusher.py b/synapse/storage/data_stores/main/pusher.py
index b12e80440a..f005c1ae0a 100644
--- a/synapse/storage/pusher.py
+++ b/synapse/storage/data_stores/main/pusher.py
@@ -22,10 +22,9 @@ from canonicaljson import encode_canonical_json, json
from twisted.internet import defer
+from synapse.storage._base import SQLBaseStore
from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
-from ._base import SQLBaseStore
-
logger = logging.getLogger(__name__)
if six.PY2:
diff --git a/synapse/storage/receipts.py b/synapse/storage/data_stores/main/receipts.py
index 0c24430f28..0c24430f28 100644
--- a/synapse/storage/receipts.py
+++ b/synapse/storage/data_stores/main/receipts.py
diff --git a/synapse/storage/registration.py b/synapse/storage/data_stores/main/registration.py
index 6c5b29288a..6c5b29288a 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/data_stores/main/registration.py
diff --git a/synapse/storage/rejections.py b/synapse/storage/data_stores/main/rejections.py
index f4c1c2a457..7d5de0ea2e 100644
--- a/synapse/storage/rejections.py
+++ b/synapse/storage/data_stores/main/rejections.py
@@ -15,7 +15,7 @@
import logging
-from ._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore
logger = logging.getLogger(__name__)
diff --git a/synapse/storage/data_stores/main/relations.py b/synapse/storage/data_stores/main/relations.py
new file mode 100644
index 0000000000..858f65582b
--- /dev/null
+++ b/synapse/storage/data_stores/main/relations.py
@@ -0,0 +1,385 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import attr
+
+from synapse.api.constants import RelationTypes
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.data_stores.main.stream import generate_pagination_where_clause
+from synapse.storage.relations import (
+ AggregationPaginationToken,
+ PaginationChunk,
+ RelationPaginationToken,
+)
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+
+logger = logging.getLogger(__name__)
+
+
+class RelationsWorkerStore(SQLBaseStore):
+ @cached(tree=True)
+ def get_relations_for_event(
+ self,
+ event_id,
+ relation_type=None,
+ event_type=None,
+ aggregation_key=None,
+ limit=5,
+ direction="b",
+ from_token=None,
+ to_token=None,
+ ):
+ """Get a list of relations for an event, ordered by topological ordering.
+
+ Args:
+ event_id (str): Fetch events that relate to this event ID.
+ relation_type (str|None): Only fetch events with this relation
+ type, if given.
+ event_type (str|None): Only fetch events with this event type, if
+ given.
+ aggregation_key (str|None): Only fetch events with this aggregation
+ key, if given.
+ limit (int): Only fetch the most recent `limit` events.
+ direction (str): Whether to fetch the most recent first (`"b"`) or
+ the oldest first (`"f"`).
+ from_token (RelationPaginationToken|None): Fetch rows from the given
+ token, or from the start if None.
+ to_token (RelationPaginationToken|None): Fetch rows up to the given
+ token, or up to the end if None.
+
+ Returns:
+ Deferred[PaginationChunk]: List of event IDs that match relations
+ requested. The rows are of the form `{"event_id": "..."}`.
+ """
+
+ where_clause = ["relates_to_id = ?"]
+ where_args = [event_id]
+
+ if relation_type is not None:
+ where_clause.append("relation_type = ?")
+ where_args.append(relation_type)
+
+ if event_type is not None:
+ where_clause.append("type = ?")
+ where_args.append(event_type)
+
+ if aggregation_key:
+ where_clause.append("aggregation_key = ?")
+ where_args.append(aggregation_key)
+
+ pagination_clause = generate_pagination_where_clause(
+ direction=direction,
+ column_names=("topological_ordering", "stream_ordering"),
+ from_token=attr.astuple(from_token) if from_token else None,
+ to_token=attr.astuple(to_token) if to_token else None,
+ engine=self.database_engine,
+ )
+
+ if pagination_clause:
+ where_clause.append(pagination_clause)
+
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ sql = """
+ SELECT event_id, topological_ordering, stream_ordering
+ FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE %s
+ ORDER BY topological_ordering %s, stream_ordering %s
+ LIMIT ?
+ """ % (
+ " AND ".join(where_clause),
+ order,
+ order,
+ )
+
+ def _get_recent_references_for_event_txn(txn):
+ txn.execute(sql, where_args + [limit + 1])
+
+ last_topo_id = None
+ last_stream_id = None
+ events = []
+ for row in txn:
+ events.append({"event_id": row[0]})
+ last_topo_id = row[1]
+ last_stream_id = row[2]
+
+ next_batch = None
+ if len(events) > limit and last_topo_id and last_stream_id:
+ next_batch = RelationPaginationToken(last_topo_id, last_stream_id)
+
+ return PaginationChunk(
+ chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+ )
+
+ return self.runInteraction(
+ "get_recent_references_for_event", _get_recent_references_for_event_txn
+ )
+
+ @cached(tree=True)
+ def get_aggregation_groups_for_event(
+ self,
+ event_id,
+ event_type=None,
+ limit=5,
+ direction="b",
+ from_token=None,
+ to_token=None,
+ ):
+ """Get a list of annotations on the event, grouped by event type and
+ aggregation key, sorted by count.
+
+ This is used e.g. to get the what and how many reactions have happend
+ on an event.
+
+ Args:
+ event_id (str): Fetch events that relate to this event ID.
+ event_type (str|None): Only fetch events with this event type, if
+ given.
+ limit (int): Only fetch the `limit` groups.
+ direction (str): Whether to fetch the highest count first (`"b"`) or
+ the lowest count first (`"f"`).
+ from_token (AggregationPaginationToken|None): Fetch rows from the
+ given token, or from the start if None.
+ to_token (AggregationPaginationToken|None): Fetch rows up to the
+ given token, or up to the end if None.
+
+
+ Returns:
+ Deferred[PaginationChunk]: List of groups of annotations that
+ match. Each row is a dict with `type`, `key` and `count` fields.
+ """
+
+ where_clause = ["relates_to_id = ?", "relation_type = ?"]
+ where_args = [event_id, RelationTypes.ANNOTATION]
+
+ if event_type:
+ where_clause.append("type = ?")
+ where_args.append(event_type)
+
+ having_clause = generate_pagination_where_clause(
+ direction=direction,
+ column_names=("COUNT(*)", "MAX(stream_ordering)"),
+ from_token=attr.astuple(from_token) if from_token else None,
+ to_token=attr.astuple(to_token) if to_token else None,
+ engine=self.database_engine,
+ )
+
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ if having_clause:
+ having_clause = "HAVING " + having_clause
+ else:
+ having_clause = ""
+
+ sql = """
+ SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering)
+ FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE {where_clause}
+ GROUP BY relation_type, type, aggregation_key
+ {having_clause}
+ ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order}
+ LIMIT ?
+ """.format(
+ where_clause=" AND ".join(where_clause),
+ order=order,
+ having_clause=having_clause,
+ )
+
+ def _get_aggregation_groups_for_event_txn(txn):
+ txn.execute(sql, where_args + [limit + 1])
+
+ next_batch = None
+ events = []
+ for row in txn:
+ events.append({"type": row[0], "key": row[1], "count": row[2]})
+ next_batch = AggregationPaginationToken(row[2], row[3])
+
+ if len(events) <= limit:
+ next_batch = None
+
+ return PaginationChunk(
+ chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+ )
+
+ return self.runInteraction(
+ "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
+ )
+
+ @cachedInlineCallbacks()
+ def get_applicable_edit(self, event_id):
+ """Get the most recent edit (if any) that has happened for the given
+ event.
+
+ Correctly handles checking whether edits were allowed to happen.
+
+ Args:
+ event_id (str): The original event ID
+
+ Returns:
+ Deferred[EventBase|None]: Returns the most recent edit, if any.
+ """
+
+ # We only allow edits for `m.room.message` events that have the same sender
+ # and event type. We can't assert these things during regular event auth so
+ # we have to do the checks post hoc.
+
+ # Fetches latest edit that has the same type and sender as the
+ # original, and is an `m.room.message`.
+ sql = """
+ SELECT edit.event_id FROM events AS edit
+ INNER JOIN event_relations USING (event_id)
+ INNER JOIN events AS original ON
+ original.event_id = relates_to_id
+ AND edit.type = original.type
+ AND edit.sender = original.sender
+ WHERE
+ relates_to_id = ?
+ AND relation_type = ?
+ AND edit.type = 'm.room.message'
+ ORDER by edit.origin_server_ts DESC, edit.event_id DESC
+ LIMIT 1
+ """
+
+ def _get_applicable_edit_txn(txn):
+ txn.execute(sql, (event_id, RelationTypes.REPLACE))
+ row = txn.fetchone()
+ if row:
+ return row[0]
+
+ edit_id = yield self.runInteraction(
+ "get_applicable_edit", _get_applicable_edit_txn
+ )
+
+ if not edit_id:
+ return
+
+ edit_event = yield self.get_event(edit_id, allow_none=True)
+ return edit_event
+
+ def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender):
+ """Check if a user has already annotated an event with the same key
+ (e.g. already liked an event).
+
+ Args:
+ parent_id (str): The event being annotated
+ event_type (str): The event type of the annotation
+ aggregation_key (str): The aggregation key of the annotation
+ sender (str): The sender of the annotation
+
+ Returns:
+ Deferred[bool]
+ """
+
+ sql = """
+ SELECT 1 FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE
+ relates_to_id = ?
+ AND relation_type = ?
+ AND type = ?
+ AND sender = ?
+ AND aggregation_key = ?
+ LIMIT 1;
+ """
+
+ def _get_if_user_has_annotated_event(txn):
+ txn.execute(
+ sql,
+ (
+ parent_id,
+ RelationTypes.ANNOTATION,
+ event_type,
+ sender,
+ aggregation_key,
+ ),
+ )
+
+ return bool(txn.fetchone())
+
+ return self.runInteraction(
+ "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
+ )
+
+
+class RelationsStore(RelationsWorkerStore):
+ def _handle_event_relations(self, txn, event):
+ """Handles inserting relation data during peristence of events
+
+ Args:
+ txn
+ event (EventBase)
+ """
+ relation = event.content.get("m.relates_to")
+ if not relation:
+ # No relations
+ return
+
+ rel_type = relation.get("rel_type")
+ if rel_type not in (
+ RelationTypes.ANNOTATION,
+ RelationTypes.REFERENCE,
+ RelationTypes.REPLACE,
+ ):
+ # Unknown relation type
+ return
+
+ parent_id = relation.get("event_id")
+ if not parent_id:
+ # Invalid relation
+ return
+
+ aggregation_key = relation.get("key")
+
+ self._simple_insert_txn(
+ txn,
+ table="event_relations",
+ values={
+ "event_id": event.event_id,
+ "relates_to_id": parent_id,
+ "relation_type": rel_type,
+ "aggregation_key": aggregation_key,
+ },
+ )
+
+ txn.call_after(self.get_relations_for_event.invalidate_many, (parent_id,))
+ txn.call_after(
+ self.get_aggregation_groups_for_event.invalidate_many, (parent_id,)
+ )
+
+ if rel_type == RelationTypes.REPLACE:
+ txn.call_after(self.get_applicable_edit.invalidate, (parent_id,))
+
+ def _handle_redaction(self, txn, redacted_event_id):
+ """Handles receiving a redaction and checking whether we need to remove
+ any redacted relations from the database.
+
+ Args:
+ txn
+ redacted_event_id (str): The event that was redacted.
+ """
+
+ self._simple_delete_txn(
+ txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
+ )
diff --git a/synapse/storage/room.py b/synapse/storage/data_stores/main/room.py
index 43cc56fa6f..4428e5c55d 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/data_stores/main/room.py
@@ -25,7 +25,7 @@ from twisted.internet import defer
from synapse.api.errors import StoreError
from synapse.storage._base import SQLBaseStore
-from synapse.storage.search import SearchStore
+from synapse.storage.data_stores.main.search import SearchStore
from synapse.types import ThirdPartyInstanceID
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
diff --git a/synapse/storage/data_stores/main/roommember.py b/synapse/storage/data_stores/main/roommember.py
new file mode 100644
index 0000000000..e47ab604dd
--- /dev/null
+++ b/synapse/storage/data_stores/main/roommember.py
@@ -0,0 +1,1145 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from six import iteritems, itervalues
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.metrics import LaterGauge
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import LoggingTransaction, make_in_list_sql_clause
+from synapse.storage.background_updates import BackgroundUpdateStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
+from synapse.storage.engines import Sqlite3Engine
+from synapse.storage.roommember import (
+ GetRoomsForUserWithStreamOrdering,
+ MemberSummary,
+ ProfileInfo,
+ RoomsForUser,
+)
+from synapse.types import get_domain_from_id
+from synapse.util.async_helpers import Linearizer
+from synapse.util.caches import intern_string
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
+from synapse.util.metrics import Measure
+from synapse.util.stringutils import to_ascii
+
+logger = logging.getLogger(__name__)
+
+
+_MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
+_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership"
+
+
+class RoomMemberWorkerStore(EventsWorkerStore):
+ def __init__(self, db_conn, hs):
+ super(RoomMemberWorkerStore, self).__init__(db_conn, hs)
+
+ # Is the current_state_events.membership up to date? Or is the
+ # background update still running?
+ self._current_state_events_membership_up_to_date = False
+
+ txn = LoggingTransaction(
+ db_conn.cursor(),
+ name="_check_safe_current_state_events_membership_updated",
+ database_engine=self.database_engine,
+ )
+ self._check_safe_current_state_events_membership_updated_txn(txn)
+ txn.close()
+
+ if self.hs.config.metrics_flags.known_servers:
+ self._known_servers_count = 1
+ self.hs.get_clock().looping_call(
+ run_as_background_process,
+ 60 * 1000,
+ "_count_known_servers",
+ self._count_known_servers,
+ )
+ self.hs.get_clock().call_later(
+ 1000,
+ run_as_background_process,
+ "_count_known_servers",
+ self._count_known_servers,
+ )
+ LaterGauge(
+ "synapse_federation_known_servers",
+ "",
+ [],
+ lambda: self._known_servers_count,
+ )
+
+ @defer.inlineCallbacks
+ def _count_known_servers(self):
+ """
+ Count the servers that this server knows about.
+
+ The statistic is stored on the class for the
+ `synapse_federation_known_servers` LaterGauge to collect.
+ """
+
+ def _transact(txn):
+ if isinstance(self.database_engine, Sqlite3Engine):
+ query = """
+ SELECT COUNT(DISTINCT substr(out.user_id, pos+1))
+ FROM (
+ SELECT rm.user_id as user_id, instr(rm.user_id, ':')
+ AS pos FROM room_memberships as rm
+ INNER JOIN current_state_events as c ON rm.event_id = c.event_id
+ WHERE c.type = 'm.room.member'
+ ) as out
+ """
+ else:
+ query = """
+ SELECT COUNT(DISTINCT split_part(state_key, ':', 2))
+ FROM current_state_events
+ WHERE type = 'm.room.member' AND membership = 'join';
+ """
+ txn.execute(query)
+ return list(txn)[0][0]
+
+ count = yield self.runInteraction("get_known_servers", _transact)
+
+ # We always know about ourselves, even if we have nothing in
+ # room_memberships (for example, the server is new).
+ self._known_servers_count = max([count, 1])
+ return self._known_servers_count
+
+ def _check_safe_current_state_events_membership_updated_txn(self, txn):
+ """Checks if it is safe to assume the new current_state_events
+ membership column is up to date
+ """
+
+ pending_update = self._simple_select_one_txn(
+ txn,
+ table="background_updates",
+ keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
+ retcols=["update_name"],
+ allow_none=True,
+ )
+
+ self._current_state_events_membership_up_to_date = not pending_update
+
+ # If the update is still running, reschedule to run.
+ if pending_update:
+ self._clock.call_later(
+ 15.0,
+ run_as_background_process,
+ "_check_safe_current_state_events_membership_updated",
+ self.runInteraction,
+ "_check_safe_current_state_events_membership_updated",
+ self._check_safe_current_state_events_membership_updated_txn,
+ )
+
+ @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True)
+ def get_hosts_in_room(self, room_id, cache_context):
+ """Returns the set of all hosts currently in the room
+ """
+ user_ids = yield self.get_users_in_room(
+ room_id, on_invalidate=cache_context.invalidate
+ )
+ hosts = frozenset(get_domain_from_id(user_id) for user_id in user_ids)
+ return hosts
+
+ @cached(max_entries=100000, iterable=True)
+ def get_users_in_room(self, room_id):
+ return self.runInteraction(
+ "get_users_in_room", self.get_users_in_room_txn, room_id
+ )
+
+ def get_users_in_room_txn(self, txn, room_id):
+ # If we can assume current_state_events.membership is up to date
+ # then we can avoid a join, which is a Very Good Thing given how
+ # frequently this function gets called.
+ if self._current_state_events_membership_up_to_date:
+ sql = """
+ SELECT state_key FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
+ """
+ else:
+ sql = """
+ SELECT state_key FROM room_memberships as m
+ INNER JOIN current_state_events as c
+ ON m.event_id = c.event_id
+ AND m.room_id = c.room_id
+ AND m.user_id = c.state_key
+ WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
+ """
+
+ txn.execute(sql, (room_id, Membership.JOIN))
+ return [to_ascii(r[0]) for r in txn]
+
+ @cached(max_entries=100000)
+ def get_room_summary(self, room_id):
+ """ Get the details of a room roughly suitable for use by the room
+ summary extension to /sync. Useful when lazy loading room members.
+ Args:
+ room_id (str): The room ID to query
+ Returns:
+ Deferred[dict[str, MemberSummary]:
+ dict of membership states, pointing to a MemberSummary named tuple.
+ """
+
+ def _get_room_summary_txn(txn):
+ # first get counts.
+ # We do this all in one transaction to keep the cache small.
+ # FIXME: get rid of this when we have room_stats
+
+ # If we can assume current_state_events.membership is up to date
+ # then we can avoid a join, which is a Very Good Thing given how
+ # frequently this function gets called.
+ if self._current_state_events_membership_up_to_date:
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT count(*), membership FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ GROUP BY membership
+ """
+ else:
+ sql = """
+ SELECT count(*), m.membership FROM room_memberships as m
+ INNER JOIN current_state_events as c
+ ON m.event_id = c.event_id
+ AND m.room_id = c.room_id
+ AND m.user_id = c.state_key
+ WHERE c.type = 'm.room.member' AND c.room_id = ?
+ GROUP BY m.membership
+ """
+
+ txn.execute(sql, (room_id,))
+ res = {}
+ for count, membership in txn:
+ summary = res.setdefault(to_ascii(membership), MemberSummary([], count))
+
+ # we order by membership and then fairly arbitrarily by event_id so
+ # heroes are consistent
+ if self._current_state_events_membership_up_to_date:
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT state_key, membership, event_id
+ FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ ORDER BY
+ CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+ event_id ASC
+ LIMIT ?
+ """
+ else:
+ sql = """
+ SELECT c.state_key, m.membership, c.event_id
+ FROM room_memberships as m
+ INNER JOIN current_state_events as c USING (room_id, event_id)
+ WHERE c.type = 'm.room.member' AND c.room_id = ?
+ ORDER BY
+ CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+ c.event_id ASC
+ LIMIT ?
+ """
+
+ # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
+ txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
+ for user_id, membership, event_id in txn:
+ summary = res[to_ascii(membership)]
+ # we will always have a summary for this membership type at this
+ # point given the summary currently contains the counts.
+ members = summary.members
+ members.append((to_ascii(user_id), to_ascii(event_id)))
+
+ return res
+
+ return self.runInteraction("get_room_summary", _get_room_summary_txn)
+
+ def _get_user_counts_in_room_txn(self, txn, room_id):
+ """
+ Get the user count in a room by membership.
+
+ Args:
+ room_id (str)
+ membership (Membership)
+
+ Returns:
+ Deferred[int]
+ """
+ sql = """
+ SELECT m.membership, count(*) FROM room_memberships as m
+ INNER JOIN current_state_events as c USING(event_id)
+ WHERE c.type = 'm.room.member' AND c.room_id = ?
+ GROUP BY m.membership
+ """
+
+ txn.execute(sql, (room_id,))
+ return {row[0]: row[1] for row in txn}
+
+ @cached()
+ def get_invited_rooms_for_user(self, user_id):
+ """ Get all the rooms the user is invited to
+ Args:
+ user_id (str): The user ID.
+ Returns:
+ A deferred list of RoomsForUser.
+ """
+
+ return self.get_rooms_for_user_where_membership_is(user_id, [Membership.INVITE])
+
+ @defer.inlineCallbacks
+ def get_invite_for_user_in_room(self, user_id, room_id):
+ """Gets the invite for the given user and room
+
+ Args:
+ user_id (str)
+ room_id (str)
+
+ Returns:
+ Deferred: Resolves to either a RoomsForUser or None if no invite was
+ found.
+ """
+ invites = yield self.get_invited_rooms_for_user(user_id)
+ for invite in invites:
+ if invite.room_id == room_id:
+ return invite
+ return None
+
+ @defer.inlineCallbacks
+ def get_rooms_for_user_where_membership_is(self, user_id, membership_list):
+ """ Get all the rooms for this user where the membership for this user
+ matches one in the membership list.
+
+ Filters out forgotten rooms.
+
+ Args:
+ user_id (str): The user ID.
+ membership_list (list): A list of synapse.api.constants.Membership
+ values which the user must be in.
+
+ Returns:
+ Deferred[list[RoomsForUser]]
+ """
+ if not membership_list:
+ return defer.succeed(None)
+
+ rooms = yield self.runInteraction(
+ "get_rooms_for_user_where_membership_is",
+ self._get_rooms_for_user_where_membership_is_txn,
+ user_id,
+ membership_list,
+ )
+
+ # Now we filter out forgotten rooms
+ forgotten_rooms = yield self.get_forgotten_rooms_for_user(user_id)
+ return [room for room in rooms if room.room_id not in forgotten_rooms]
+
+ def _get_rooms_for_user_where_membership_is_txn(
+ self, txn, user_id, membership_list
+ ):
+
+ do_invite = Membership.INVITE in membership_list
+ membership_list = [m for m in membership_list if m != Membership.INVITE]
+
+ results = []
+ if membership_list:
+ if self._current_state_events_membership_up_to_date:
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "c.membership", membership_list
+ )
+ sql = """
+ SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND state_key = ?
+ AND %s
+ """ % (
+ clause,
+ )
+ else:
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "m.membership", membership_list
+ )
+ sql = """
+ SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN room_memberships AS m USING (room_id, event_id)
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND state_key = ?
+ AND %s
+ """ % (
+ clause,
+ )
+
+ txn.execute(sql, (user_id, *args))
+ results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)]
+
+ if do_invite:
+ sql = (
+ "SELECT i.room_id, inviter, i.event_id, e.stream_ordering"
+ " FROM local_invites as i"
+ " INNER JOIN events as e USING (event_id)"
+ " WHERE invitee = ? AND locally_rejected is NULL"
+ " AND replaced_by is NULL"
+ )
+
+ txn.execute(sql, (user_id,))
+ results.extend(
+ RoomsForUser(
+ room_id=r["room_id"],
+ sender=r["inviter"],
+ event_id=r["event_id"],
+ stream_ordering=r["stream_ordering"],
+ membership=Membership.INVITE,
+ )
+ for r in self.cursor_to_dict(txn)
+ )
+
+ return results
+
+ @cachedInlineCallbacks(max_entries=500000, iterable=True)
+ def get_rooms_for_user_with_stream_ordering(self, user_id):
+ """Returns a set of room_ids the user is currently joined to
+
+ Args:
+ user_id (str)
+
+ Returns:
+ Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns
+ the rooms the user is in currently, along with the stream ordering
+ of the most recent join for that user and room.
+ """
+ rooms = yield self.get_rooms_for_user_where_membership_is(
+ user_id, membership_list=[Membership.JOIN]
+ )
+ return frozenset(
+ GetRoomsForUserWithStreamOrdering(r.room_id, r.stream_ordering)
+ for r in rooms
+ )
+
+ @defer.inlineCallbacks
+ def get_rooms_for_user(self, user_id, on_invalidate=None):
+ """Returns a set of room_ids the user is currently joined to
+ """
+ rooms = yield self.get_rooms_for_user_with_stream_ordering(
+ user_id, on_invalidate=on_invalidate
+ )
+ return frozenset(r.room_id for r in rooms)
+
+ @cachedInlineCallbacks(max_entries=500000, cache_context=True, iterable=True)
+ def get_users_who_share_room_with_user(self, user_id, cache_context):
+ """Returns the set of users who share a room with `user_id`
+ """
+ room_ids = yield self.get_rooms_for_user(
+ user_id, on_invalidate=cache_context.invalidate
+ )
+
+ user_who_share_room = set()
+ for room_id in room_ids:
+ user_ids = yield self.get_users_in_room(
+ room_id, on_invalidate=cache_context.invalidate
+ )
+ user_who_share_room.update(user_ids)
+
+ return user_who_share_room
+
+ @defer.inlineCallbacks
+ def get_joined_users_from_context(self, event, context):
+ state_group = context.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ current_state_ids = yield context.get_current_state_ids(self)
+ result = yield self._get_joined_users_from_context(
+ event.room_id, state_group, current_state_ids, event=event, context=context
+ )
+ return result
+
+ @defer.inlineCallbacks
+ def get_joined_users_from_state(self, room_id, state_entry):
+ state_group = state_entry.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ with Measure(self._clock, "get_joined_users_from_state"):
+ return (
+ yield self._get_joined_users_from_context(
+ room_id, state_group, state_entry.state, context=state_entry
+ )
+ )
+
+ @cachedInlineCallbacks(
+ num_args=2, cache_context=True, iterable=True, max_entries=100000
+ )
+ def _get_joined_users_from_context(
+ self,
+ room_id,
+ state_group,
+ current_state_ids,
+ cache_context,
+ event=None,
+ context=None,
+ ):
+ # We don't use `state_group`, it's there so that we can cache based
+ # on it. However, it's important that it's never None, since two current_states
+ # with a state_group of None are likely to be different.
+ # See bulk_get_push_rules_for_room for how we work around this.
+ assert state_group is not None
+
+ users_in_room = {}
+ member_event_ids = [
+ e_id
+ for key, e_id in iteritems(current_state_ids)
+ if key[0] == EventTypes.Member
+ ]
+
+ if context is not None:
+ # If we have a context with a delta from a previous state group,
+ # check if we also have the result from the previous group in cache.
+ # If we do then we can reuse that result and simply update it with
+ # any membership changes in `delta_ids`
+ if context.prev_group and context.delta_ids:
+ prev_res = self._get_joined_users_from_context.cache.get(
+ (room_id, context.prev_group), None
+ )
+ if prev_res and isinstance(prev_res, dict):
+ users_in_room = dict(prev_res)
+ member_event_ids = [
+ e_id
+ for key, e_id in iteritems(context.delta_ids)
+ if key[0] == EventTypes.Member
+ ]
+ for etype, state_key in context.delta_ids:
+ users_in_room.pop(state_key, None)
+
+ # We check if we have any of the member event ids in the event cache
+ # before we ask the DB
+
+ # We don't update the event cache hit ratio as it completely throws off
+ # the hit ratio counts. After all, we don't populate the cache if we
+ # miss it here
+ event_map = self._get_events_from_cache(
+ member_event_ids, allow_rejected=False, update_metrics=False
+ )
+
+ missing_member_event_ids = []
+ for event_id in member_event_ids:
+ ev_entry = event_map.get(event_id)
+ if ev_entry:
+ if ev_entry.event.membership == Membership.JOIN:
+ users_in_room[to_ascii(ev_entry.event.state_key)] = ProfileInfo(
+ display_name=to_ascii(
+ ev_entry.event.content.get("displayname", None)
+ ),
+ avatar_url=to_ascii(
+ ev_entry.event.content.get("avatar_url", None)
+ ),
+ )
+ else:
+ missing_member_event_ids.append(event_id)
+
+ if missing_member_event_ids:
+ event_to_memberships = yield self._get_joined_profiles_from_event_ids(
+ missing_member_event_ids
+ )
+ users_in_room.update((row for row in event_to_memberships.values() if row))
+
+ if event is not None and event.type == EventTypes.Member:
+ if event.membership == Membership.JOIN:
+ if event.event_id in member_event_ids:
+ users_in_room[to_ascii(event.state_key)] = ProfileInfo(
+ display_name=to_ascii(event.content.get("displayname", None)),
+ avatar_url=to_ascii(event.content.get("avatar_url", None)),
+ )
+
+ return users_in_room
+
+ @cached(max_entries=10000)
+ def _get_joined_profile_from_event_id(self, event_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_joined_profile_from_event_id",
+ list_name="event_ids",
+ inlineCallbacks=True,
+ )
+ def _get_joined_profiles_from_event_ids(self, event_ids):
+ """For given set of member event_ids check if they point to a join
+ event and if so return the associated user and profile info.
+
+ Args:
+ event_ids (Iterable[str]): The member event IDs to lookup
+
+ Returns:
+ Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID
+ to `user_id` and ProfileInfo (or None if not join event).
+ """
+
+ rows = yield self._simple_select_many_batch(
+ table="room_memberships",
+ column="event_id",
+ iterable=event_ids,
+ retcols=("user_id", "display_name", "avatar_url", "event_id"),
+ keyvalues={"membership": Membership.JOIN},
+ batch_size=500,
+ desc="_get_membership_from_event_ids",
+ )
+
+ return {
+ row["event_id"]: (
+ row["user_id"],
+ ProfileInfo(
+ avatar_url=row["avatar_url"], display_name=row["display_name"]
+ ),
+ )
+ for row in rows
+ }
+
+ @cachedInlineCallbacks(max_entries=10000)
+ def is_host_joined(self, room_id, host):
+ if "%" in host or "_" in host:
+ raise Exception("Invalid host name")
+
+ sql = """
+ SELECT state_key FROM current_state_events AS c
+ INNER JOIN room_memberships AS m USING (event_id)
+ WHERE m.membership = 'join'
+ AND type = 'm.room.member'
+ AND c.room_id = ?
+ AND state_key LIKE ?
+ LIMIT 1
+ """
+
+ # We do need to be careful to ensure that host doesn't have any wild cards
+ # in it, but we checked above for known ones and we'll check below that
+ # the returned user actually has the correct domain.
+ like_clause = "%:" + host
+
+ rows = yield self._execute("is_host_joined", None, sql, room_id, like_clause)
+
+ if not rows:
+ return False
+
+ user_id = rows[0][0]
+ if get_domain_from_id(user_id) != host:
+ # This can only happen if the host name has something funky in it
+ raise Exception("Invalid host name")
+
+ return True
+
+ @cachedInlineCallbacks()
+ def was_host_joined(self, room_id, host):
+ """Check whether the server is or ever was in the room.
+
+ Args:
+ room_id (str)
+ host (str)
+
+ Returns:
+ Deferred: Resolves to True if the host is/was in the room, otherwise
+ False.
+ """
+ if "%" in host or "_" in host:
+ raise Exception("Invalid host name")
+
+ sql = """
+ SELECT user_id FROM room_memberships
+ WHERE room_id = ?
+ AND user_id LIKE ?
+ AND membership = 'join'
+ LIMIT 1
+ """
+
+ # We do need to be careful to ensure that host doesn't have any wild cards
+ # in it, but we checked above for known ones and we'll check below that
+ # the returned user actually has the correct domain.
+ like_clause = "%:" + host
+
+ rows = yield self._execute("was_host_joined", None, sql, room_id, like_clause)
+
+ if not rows:
+ return False
+
+ user_id = rows[0][0]
+ if get_domain_from_id(user_id) != host:
+ # This can only happen if the host name has something funky in it
+ raise Exception("Invalid host name")
+
+ return True
+
+ @defer.inlineCallbacks
+ def get_joined_hosts(self, room_id, state_entry):
+ state_group = state_entry.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ with Measure(self._clock, "get_joined_hosts"):
+ return (
+ yield self._get_joined_hosts(
+ room_id, state_group, state_entry.state, state_entry=state_entry
+ )
+ )
+
+ @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True)
+ # @defer.inlineCallbacks
+ def _get_joined_hosts(self, room_id, state_group, current_state_ids, state_entry):
+ # We don't use `state_group`, its there so that we can cache based
+ # on it. However, its important that its never None, since two current_state's
+ # with a state_group of None are likely to be different.
+ # See bulk_get_push_rules_for_room for how we work around this.
+ assert state_group is not None
+
+ cache = self._get_joined_hosts_cache(room_id)
+ joined_hosts = yield cache.get_destinations(state_entry)
+
+ return joined_hosts
+
+ @cached(max_entries=10000)
+ def _get_joined_hosts_cache(self, room_id):
+ return _JoinedHostsCache(self, room_id)
+
+ @cachedInlineCallbacks(num_args=2)
+ def did_forget(self, user_id, room_id):
+ """Returns whether user_id has elected to discard history for room_id.
+
+ Returns False if they have since re-joined."""
+
+ def f(txn):
+ sql = (
+ "SELECT"
+ " COUNT(*)"
+ " FROM"
+ " room_memberships"
+ " WHERE"
+ " user_id = ?"
+ " AND"
+ " room_id = ?"
+ " AND"
+ " forgotten = 0"
+ )
+ txn.execute(sql, (user_id, room_id))
+ rows = txn.fetchall()
+ return rows[0][0]
+
+ count = yield self.runInteraction("did_forget_membership", f)
+ return count == 0
+
+ @cached()
+ def get_forgotten_rooms_for_user(self, user_id):
+ """Gets all rooms the user has forgotten.
+
+ Args:
+ user_id (str)
+
+ Returns:
+ Deferred[set[str]]
+ """
+
+ def _get_forgotten_rooms_for_user_txn(txn):
+ # This is a slightly convoluted query that first looks up all rooms
+ # that the user has forgotten in the past, then rechecks that list
+ # to see if any have subsequently been updated. This is done so that
+ # we can use a partial index on `forgotten = 1` on the assumption
+ # that few users will actually forget many rooms.
+ #
+ # Note that a room is considered "forgotten" if *all* membership
+ # events for that user and room have the forgotten field set (as
+ # when a user forgets a room we update all rows for that user and
+ # room, not just the current one).
+ sql = """
+ SELECT room_id, (
+ SELECT count(*) FROM room_memberships
+ WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0
+ ) AS count
+ FROM room_memberships AS m
+ WHERE user_id = ? AND forgotten = 1
+ GROUP BY room_id, user_id;
+ """
+ txn.execute(sql, (user_id,))
+ return set(row[0] for row in txn if row[1] == 0)
+
+ return self.runInteraction(
+ "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn
+ )
+
+ @defer.inlineCallbacks
+ def get_rooms_user_has_been_in(self, user_id):
+ """Get all rooms that the user has ever been in.
+
+ Args:
+ user_id (str)
+
+ Returns:
+ Deferred[set[str]]: Set of room IDs.
+ """
+
+ room_ids = yield self._simple_select_onecol(
+ table="room_memberships",
+ keyvalues={"membership": Membership.JOIN, "user_id": user_id},
+ retcol="room_id",
+ desc="get_rooms_user_has_been_in",
+ )
+
+ return set(room_ids)
+
+
+class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore):
+ def __init__(self, db_conn, hs):
+ super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs)
+ self.register_background_update_handler(
+ _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile
+ )
+ self.register_background_update_handler(
+ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
+ self._background_current_state_membership,
+ )
+ self.register_background_index_update(
+ "room_membership_forgotten_idx",
+ index_name="room_memberships_user_room_forgotten",
+ table="room_memberships",
+ columns=["user_id", "room_id"],
+ where_clause="forgotten = 1",
+ )
+
+ @defer.inlineCallbacks
+ def _background_add_membership_profile(self, progress, batch_size):
+ target_min_stream_id = progress.get(
+ "target_min_stream_id_inclusive", self._min_stream_order_on_start
+ )
+ max_stream_id = progress.get(
+ "max_stream_id_exclusive", self._stream_order_on_start + 1
+ )
+
+ INSERT_CLUMP_SIZE = 1000
+
+ def add_membership_profile_txn(txn):
+ sql = """
+ SELECT stream_ordering, event_id, events.room_id, event_json.json
+ FROM events
+ INNER JOIN event_json USING (event_id)
+ INNER JOIN room_memberships USING (event_id)
+ WHERE ? <= stream_ordering AND stream_ordering < ?
+ AND type = 'm.room.member'
+ ORDER BY stream_ordering DESC
+ LIMIT ?
+ """
+
+ txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+ rows = self.cursor_to_dict(txn)
+ if not rows:
+ return 0
+
+ min_stream_id = rows[-1]["stream_ordering"]
+
+ to_update = []
+ for row in rows:
+ event_id = row["event_id"]
+ room_id = row["room_id"]
+ try:
+ event_json = json.loads(row["json"])
+ content = event_json["content"]
+ except Exception:
+ continue
+
+ display_name = content.get("displayname", None)
+ avatar_url = content.get("avatar_url", None)
+
+ if display_name or avatar_url:
+ to_update.append((display_name, avatar_url, event_id, room_id))
+
+ to_update_sql = """
+ UPDATE room_memberships SET display_name = ?, avatar_url = ?
+ WHERE event_id = ? AND room_id = ?
+ """
+ for index in range(0, len(to_update), INSERT_CLUMP_SIZE):
+ clump = to_update[index : index + INSERT_CLUMP_SIZE]
+ txn.executemany(to_update_sql, clump)
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ }
+
+ self._background_update_progress_txn(
+ txn, _MEMBERSHIP_PROFILE_UPDATE_NAME, progress
+ )
+
+ return len(rows)
+
+ result = yield self.runInteraction(
+ _MEMBERSHIP_PROFILE_UPDATE_NAME, add_membership_profile_txn
+ )
+
+ if not result:
+ yield self._end_background_update(_MEMBERSHIP_PROFILE_UPDATE_NAME)
+
+ return result
+
+ @defer.inlineCallbacks
+ def _background_current_state_membership(self, progress, batch_size):
+ """Update the new membership column on current_state_events.
+
+ This works by iterating over all rooms in alphebetical order.
+ """
+
+ def _background_current_state_membership_txn(txn, last_processed_room):
+ processed = 0
+ while processed < batch_size:
+ txn.execute(
+ """
+ SELECT MIN(room_id) FROM current_state_events WHERE room_id > ?
+ """,
+ (last_processed_room,),
+ )
+ row = txn.fetchone()
+ if not row or not row[0]:
+ return processed, True
+
+ next_room, = row
+
+ sql = """
+ UPDATE current_state_events
+ SET membership = (
+ SELECT membership FROM room_memberships
+ WHERE event_id = current_state_events.event_id
+ )
+ WHERE room_id = ?
+ """
+ txn.execute(sql, (next_room,))
+ processed += txn.rowcount
+
+ last_processed_room = next_room
+
+ self._background_update_progress_txn(
+ txn,
+ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
+ {"last_processed_room": last_processed_room},
+ )
+
+ return processed, False
+
+ # If we haven't got a last processed room then just use the empty
+ # string, which will compare before all room IDs correctly.
+ last_processed_room = progress.get("last_processed_room", "")
+
+ row_count, finished = yield self.runInteraction(
+ "_background_current_state_membership_update",
+ _background_current_state_membership_txn,
+ last_processed_room,
+ )
+
+ if finished:
+ yield self._end_background_update(_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME)
+
+ return row_count
+
+
+class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
+ def __init__(self, db_conn, hs):
+ super(RoomMemberStore, self).__init__(db_conn, hs)
+
+ def _store_room_members_txn(self, txn, events, backfilled):
+ """Store a room member in the database.
+ """
+ self._simple_insert_many_txn(
+ txn,
+ table="room_memberships",
+ values=[
+ {
+ "event_id": event.event_id,
+ "user_id": event.state_key,
+ "sender": event.user_id,
+ "room_id": event.room_id,
+ "membership": event.membership,
+ "display_name": event.content.get("displayname", None),
+ "avatar_url": event.content.get("avatar_url", None),
+ }
+ for event in events
+ ],
+ )
+
+ for event in events:
+ txn.call_after(
+ self._membership_stream_cache.entity_has_changed,
+ event.state_key,
+ event.internal_metadata.stream_ordering,
+ )
+ txn.call_after(
+ self.get_invited_rooms_for_user.invalidate, (event.state_key,)
+ )
+
+ # We update the local_invites table only if the event is "current",
+ # i.e., its something that has just happened. If the event is an
+ # outlier it is only current if its an "out of band membership",
+ # like a remote invite or a rejection of a remote invite.
+ is_new_state = not backfilled and (
+ not event.internal_metadata.is_outlier()
+ or event.internal_metadata.is_out_of_band_membership()
+ )
+ is_mine = self.hs.is_mine_id(event.state_key)
+ if is_new_state and is_mine:
+ if event.membership == Membership.INVITE:
+ self._simple_insert_txn(
+ txn,
+ table="local_invites",
+ values={
+ "event_id": event.event_id,
+ "invitee": event.state_key,
+ "inviter": event.sender,
+ "room_id": event.room_id,
+ "stream_id": event.internal_metadata.stream_ordering,
+ },
+ )
+ else:
+ sql = (
+ "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE"
+ " room_id = ? AND invitee = ? AND locally_rejected is NULL"
+ " AND replaced_by is NULL"
+ )
+
+ txn.execute(
+ sql,
+ (
+ event.internal_metadata.stream_ordering,
+ event.event_id,
+ event.room_id,
+ event.state_key,
+ ),
+ )
+
+ @defer.inlineCallbacks
+ def locally_reject_invite(self, user_id, room_id):
+ sql = (
+ "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE"
+ " room_id = ? AND invitee = ? AND locally_rejected is NULL"
+ " AND replaced_by is NULL"
+ )
+
+ def f(txn, stream_ordering):
+ txn.execute(sql, (stream_ordering, True, room_id, user_id))
+
+ with self._stream_id_gen.get_next() as stream_ordering:
+ yield self.runInteraction("locally_reject_invite", f, stream_ordering)
+
+ def forget(self, user_id, room_id):
+ """Indicate that user_id wishes to discard history for room_id."""
+
+ def f(txn):
+ sql = (
+ "UPDATE"
+ " room_memberships"
+ " SET"
+ " forgotten = 1"
+ " WHERE"
+ " user_id = ?"
+ " AND"
+ " room_id = ?"
+ )
+ txn.execute(sql, (user_id, room_id))
+
+ self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
+ self._invalidate_cache_and_stream(
+ txn, self.get_forgotten_rooms_for_user, (user_id,)
+ )
+
+ return self.runInteraction("forget_membership", f)
+
+
+class _JoinedHostsCache(object):
+ """Cache for joined hosts in a room that is optimised to handle updates
+ via state deltas.
+ """
+
+ def __init__(self, store, room_id):
+ self.store = store
+ self.room_id = room_id
+
+ self.hosts_to_joined_users = {}
+
+ self.state_group = object()
+
+ self.linearizer = Linearizer("_JoinedHostsCache")
+
+ self._len = 0
+
+ @defer.inlineCallbacks
+ def get_destinations(self, state_entry):
+ """Get set of destinations for a state entry
+
+ Args:
+ state_entry(synapse.state._StateCacheEntry)
+ """
+ if state_entry.state_group == self.state_group:
+ return frozenset(self.hosts_to_joined_users)
+
+ with (yield self.linearizer.queue(())):
+ if state_entry.state_group == self.state_group:
+ pass
+ elif state_entry.prev_group == self.state_group:
+ for (typ, state_key), event_id in iteritems(state_entry.delta_ids):
+ if typ != EventTypes.Member:
+ continue
+
+ host = intern_string(get_domain_from_id(state_key))
+ user_id = state_key
+ known_joins = self.hosts_to_joined_users.setdefault(host, set())
+
+ event = yield self.store.get_event(event_id)
+ if event.membership == Membership.JOIN:
+ known_joins.add(user_id)
+ else:
+ known_joins.discard(user_id)
+
+ if not known_joins:
+ self.hosts_to_joined_users.pop(host, None)
+ else:
+ joined_users = yield self.store.get_joined_users_from_state(
+ self.room_id, state_entry
+ )
+
+ self.hosts_to_joined_users = {}
+ for user_id in joined_users:
+ host = intern_string(get_domain_from_id(user_id))
+ self.hosts_to_joined_users.setdefault(host, set()).add(user_id)
+
+ if state_entry.state_group:
+ self.state_group = state_entry.state_group
+ else:
+ self.state_group = object()
+ self._len = sum(len(v) for v in itervalues(self.hosts_to_joined_users))
+ return frozenset(self.hosts_to_joined_users)
+
+ def __len__(self):
+ return self._len
diff --git a/synapse/storage/schema/delta/12/v12.sql b/synapse/storage/data_stores/main/schema/delta/12/v12.sql
index 5964c5aaac..5964c5aaac 100644
--- a/synapse/storage/schema/delta/12/v12.sql
+++ b/synapse/storage/data_stores/main/schema/delta/12/v12.sql
diff --git a/synapse/storage/schema/delta/13/v13.sql b/synapse/storage/data_stores/main/schema/delta/13/v13.sql
index f8649e5d99..f8649e5d99 100644
--- a/synapse/storage/schema/delta/13/v13.sql
+++ b/synapse/storage/data_stores/main/schema/delta/13/v13.sql
diff --git a/synapse/storage/schema/delta/14/v14.sql b/synapse/storage/data_stores/main/schema/delta/14/v14.sql
index a831920da6..a831920da6 100644
--- a/synapse/storage/schema/delta/14/v14.sql
+++ b/synapse/storage/data_stores/main/schema/delta/14/v14.sql
diff --git a/synapse/storage/schema/delta/15/appservice_txns.sql b/synapse/storage/data_stores/main/schema/delta/15/appservice_txns.sql
index e4f5e76aec..e4f5e76aec 100644
--- a/synapse/storage/schema/delta/15/appservice_txns.sql
+++ b/synapse/storage/data_stores/main/schema/delta/15/appservice_txns.sql
diff --git a/synapse/storage/schema/delta/15/presence_indices.sql b/synapse/storage/data_stores/main/schema/delta/15/presence_indices.sql
index 6b8d0f1ca7..6b8d0f1ca7 100644
--- a/synapse/storage/schema/delta/15/presence_indices.sql
+++ b/synapse/storage/data_stores/main/schema/delta/15/presence_indices.sql
diff --git a/synapse/storage/schema/delta/15/v15.sql b/synapse/storage/data_stores/main/schema/delta/15/v15.sql
index 9523d2bcc3..9523d2bcc3 100644
--- a/synapse/storage/schema/delta/15/v15.sql
+++ b/synapse/storage/data_stores/main/schema/delta/15/v15.sql
diff --git a/synapse/storage/schema/delta/16/events_order_index.sql b/synapse/storage/data_stores/main/schema/delta/16/events_order_index.sql
index a48f215170..a48f215170 100644
--- a/synapse/storage/schema/delta/16/events_order_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/events_order_index.sql
diff --git a/synapse/storage/schema/delta/16/remote_media_cache_index.sql b/synapse/storage/data_stores/main/schema/delta/16/remote_media_cache_index.sql
index 7a15265cb1..7a15265cb1 100644
--- a/synapse/storage/schema/delta/16/remote_media_cache_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/remote_media_cache_index.sql
diff --git a/synapse/storage/schema/delta/16/remove_duplicates.sql b/synapse/storage/data_stores/main/schema/delta/16/remove_duplicates.sql
index 65c97b5e2f..65c97b5e2f 100644
--- a/synapse/storage/schema/delta/16/remove_duplicates.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/remove_duplicates.sql
diff --git a/synapse/storage/schema/delta/16/room_alias_index.sql b/synapse/storage/data_stores/main/schema/delta/16/room_alias_index.sql
index f82486132b..f82486132b 100644
--- a/synapse/storage/schema/delta/16/room_alias_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/room_alias_index.sql
diff --git a/synapse/storage/schema/delta/16/unique_constraints.sql b/synapse/storage/data_stores/main/schema/delta/16/unique_constraints.sql
index 5b8de52c33..5b8de52c33 100644
--- a/synapse/storage/schema/delta/16/unique_constraints.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/unique_constraints.sql
diff --git a/synapse/storage/schema/delta/16/users.sql b/synapse/storage/data_stores/main/schema/delta/16/users.sql
index cd0709250d..cd0709250d 100644
--- a/synapse/storage/schema/delta/16/users.sql
+++ b/synapse/storage/data_stores/main/schema/delta/16/users.sql
diff --git a/synapse/storage/schema/delta/17/drop_indexes.sql b/synapse/storage/data_stores/main/schema/delta/17/drop_indexes.sql
index 7c9a90e27f..7c9a90e27f 100644
--- a/synapse/storage/schema/delta/17/drop_indexes.sql
+++ b/synapse/storage/data_stores/main/schema/delta/17/drop_indexes.sql
diff --git a/synapse/storage/schema/delta/17/server_keys.sql b/synapse/storage/data_stores/main/schema/delta/17/server_keys.sql
index 70b247a06b..70b247a06b 100644
--- a/synapse/storage/schema/delta/17/server_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/17/server_keys.sql
diff --git a/synapse/storage/schema/delta/17/user_threepids.sql b/synapse/storage/data_stores/main/schema/delta/17/user_threepids.sql
index c17715ac80..c17715ac80 100644
--- a/synapse/storage/schema/delta/17/user_threepids.sql
+++ b/synapse/storage/data_stores/main/schema/delta/17/user_threepids.sql
diff --git a/synapse/storage/schema/delta/18/server_keys_bigger_ints.sql b/synapse/storage/data_stores/main/schema/delta/18/server_keys_bigger_ints.sql
index 6e0871c92b..6e0871c92b 100644
--- a/synapse/storage/schema/delta/18/server_keys_bigger_ints.sql
+++ b/synapse/storage/data_stores/main/schema/delta/18/server_keys_bigger_ints.sql
diff --git a/synapse/storage/schema/delta/19/event_index.sql b/synapse/storage/data_stores/main/schema/delta/19/event_index.sql
index 18b97b4332..18b97b4332 100644
--- a/synapse/storage/schema/delta/19/event_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/19/event_index.sql
diff --git a/synapse/storage/schema/delta/20/dummy.sql b/synapse/storage/data_stores/main/schema/delta/20/dummy.sql
index e0ac49d1ec..e0ac49d1ec 100644
--- a/synapse/storage/schema/delta/20/dummy.sql
+++ b/synapse/storage/data_stores/main/schema/delta/20/dummy.sql
diff --git a/synapse/storage/schema/delta/20/pushers.py b/synapse/storage/data_stores/main/schema/delta/20/pushers.py
index 3edfcfd783..3edfcfd783 100644
--- a/synapse/storage/schema/delta/20/pushers.py
+++ b/synapse/storage/data_stores/main/schema/delta/20/pushers.py
diff --git a/synapse/storage/schema/delta/21/end_to_end_keys.sql b/synapse/storage/data_stores/main/schema/delta/21/end_to_end_keys.sql
index 4c2fb20b77..4c2fb20b77 100644
--- a/synapse/storage/schema/delta/21/end_to_end_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/21/end_to_end_keys.sql
diff --git a/synapse/storage/schema/delta/21/receipts.sql b/synapse/storage/data_stores/main/schema/delta/21/receipts.sql
index d070845477..d070845477 100644
--- a/synapse/storage/schema/delta/21/receipts.sql
+++ b/synapse/storage/data_stores/main/schema/delta/21/receipts.sql
diff --git a/synapse/storage/schema/delta/22/receipts_index.sql b/synapse/storage/data_stores/main/schema/delta/22/receipts_index.sql
index bfc0b3bcaa..bfc0b3bcaa 100644
--- a/synapse/storage/schema/delta/22/receipts_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/22/receipts_index.sql
diff --git a/synapse/storage/schema/delta/22/user_threepids_unique.sql b/synapse/storage/data_stores/main/schema/delta/22/user_threepids_unique.sql
index 87edfa454c..87edfa454c 100644
--- a/synapse/storage/schema/delta/22/user_threepids_unique.sql
+++ b/synapse/storage/data_stores/main/schema/delta/22/user_threepids_unique.sql
diff --git a/synapse/storage/schema/delta/23/drop_state_index.sql b/synapse/storage/data_stores/main/schema/delta/23/drop_state_index.sql
index ae09fa0065..ae09fa0065 100644
--- a/synapse/storage/schema/delta/23/drop_state_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/23/drop_state_index.sql
diff --git a/synapse/storage/schema/delta/24/stats_reporting.sql b/synapse/storage/data_stores/main/schema/delta/24/stats_reporting.sql
index acea7483bd..acea7483bd 100644
--- a/synapse/storage/schema/delta/24/stats_reporting.sql
+++ b/synapse/storage/data_stores/main/schema/delta/24/stats_reporting.sql
diff --git a/synapse/storage/data_stores/main/schema/delta/25/00background_updates.sql b/synapse/storage/data_stores/main/schema/delta/25/00background_updates.sql
new file mode 100644
index 0000000000..2ad9e8fa56
--- /dev/null
+++ b/synapse/storage/data_stores/main/schema/delta/25/00background_updates.sql
@@ -0,0 +1,21 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS background_updates(
+ update_name TEXT NOT NULL, -- The name of the background update.
+ progress_json TEXT NOT NULL, -- The current progress of the update as JSON.
+ CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
+);
diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/data_stores/main/schema/delta/25/fts.py
index 4b2ffd35fd..4b2ffd35fd 100644
--- a/synapse/storage/schema/delta/25/fts.py
+++ b/synapse/storage/data_stores/main/schema/delta/25/fts.py
diff --git a/synapse/storage/schema/delta/25/guest_access.sql b/synapse/storage/data_stores/main/schema/delta/25/guest_access.sql
index 1ea389b471..1ea389b471 100644
--- a/synapse/storage/schema/delta/25/guest_access.sql
+++ b/synapse/storage/data_stores/main/schema/delta/25/guest_access.sql
diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/data_stores/main/schema/delta/25/history_visibility.sql
index f468fc1897..f468fc1897 100644
--- a/synapse/storage/schema/delta/25/history_visibility.sql
+++ b/synapse/storage/data_stores/main/schema/delta/25/history_visibility.sql
diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/data_stores/main/schema/delta/25/tags.sql
index 7a32ce68e4..7a32ce68e4 100644
--- a/synapse/storage/schema/delta/25/tags.sql
+++ b/synapse/storage/data_stores/main/schema/delta/25/tags.sql
diff --git a/synapse/storage/schema/delta/26/account_data.sql b/synapse/storage/data_stores/main/schema/delta/26/account_data.sql
index e395de2b5e..e395de2b5e 100644
--- a/synapse/storage/schema/delta/26/account_data.sql
+++ b/synapse/storage/data_stores/main/schema/delta/26/account_data.sql
diff --git a/synapse/storage/schema/delta/27/account_data.sql b/synapse/storage/data_stores/main/schema/delta/27/account_data.sql
index bf0558b5b3..bf0558b5b3 100644
--- a/synapse/storage/schema/delta/27/account_data.sql
+++ b/synapse/storage/data_stores/main/schema/delta/27/account_data.sql
diff --git a/synapse/storage/schema/delta/27/forgotten_memberships.sql b/synapse/storage/data_stores/main/schema/delta/27/forgotten_memberships.sql
index e2094f37fe..e2094f37fe 100644
--- a/synapse/storage/schema/delta/27/forgotten_memberships.sql
+++ b/synapse/storage/data_stores/main/schema/delta/27/forgotten_memberships.sql
diff --git a/synapse/storage/schema/delta/27/ts.py b/synapse/storage/data_stores/main/schema/delta/27/ts.py
index 414f9f5aa0..414f9f5aa0 100644
--- a/synapse/storage/schema/delta/27/ts.py
+++ b/synapse/storage/data_stores/main/schema/delta/27/ts.py
diff --git a/synapse/storage/schema/delta/28/event_push_actions.sql b/synapse/storage/data_stores/main/schema/delta/28/event_push_actions.sql
index 4d519849df..4d519849df 100644
--- a/synapse/storage/schema/delta/28/event_push_actions.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/event_push_actions.sql
diff --git a/synapse/storage/schema/delta/28/events_room_stream.sql b/synapse/storage/data_stores/main/schema/delta/28/events_room_stream.sql
index 36609475f1..36609475f1 100644
--- a/synapse/storage/schema/delta/28/events_room_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/events_room_stream.sql
diff --git a/synapse/storage/schema/delta/28/public_roms_index.sql b/synapse/storage/data_stores/main/schema/delta/28/public_roms_index.sql
index 6c1fd68c5b..6c1fd68c5b 100644
--- a/synapse/storage/schema/delta/28/public_roms_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/public_roms_index.sql
diff --git a/synapse/storage/schema/delta/28/receipts_user_id_index.sql b/synapse/storage/data_stores/main/schema/delta/28/receipts_user_id_index.sql
index cb84c69baa..cb84c69baa 100644
--- a/synapse/storage/schema/delta/28/receipts_user_id_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/receipts_user_id_index.sql
diff --git a/synapse/storage/schema/delta/28/upgrade_times.sql b/synapse/storage/data_stores/main/schema/delta/28/upgrade_times.sql
index 3e4a9ab455..3e4a9ab455 100644
--- a/synapse/storage/schema/delta/28/upgrade_times.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/upgrade_times.sql
diff --git a/synapse/storage/schema/delta/28/users_is_guest.sql b/synapse/storage/data_stores/main/schema/delta/28/users_is_guest.sql
index 21d2b420bf..21d2b420bf 100644
--- a/synapse/storage/schema/delta/28/users_is_guest.sql
+++ b/synapse/storage/data_stores/main/schema/delta/28/users_is_guest.sql
diff --git a/synapse/storage/schema/delta/29/push_actions.sql b/synapse/storage/data_stores/main/schema/delta/29/push_actions.sql
index 84b21cf813..84b21cf813 100644
--- a/synapse/storage/schema/delta/29/push_actions.sql
+++ b/synapse/storage/data_stores/main/schema/delta/29/push_actions.sql
diff --git a/synapse/storage/schema/delta/30/alias_creator.sql b/synapse/storage/data_stores/main/schema/delta/30/alias_creator.sql
index c9d0dde638..c9d0dde638 100644
--- a/synapse/storage/schema/delta/30/alias_creator.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/alias_creator.sql
diff --git a/synapse/storage/schema/delta/30/as_users.py b/synapse/storage/data_stores/main/schema/delta/30/as_users.py
index 9b95411fb6..9b95411fb6 100644
--- a/synapse/storage/schema/delta/30/as_users.py
+++ b/synapse/storage/data_stores/main/schema/delta/30/as_users.py
diff --git a/synapse/storage/schema/delta/30/deleted_pushers.sql b/synapse/storage/data_stores/main/schema/delta/30/deleted_pushers.sql
index 712c454aa1..712c454aa1 100644
--- a/synapse/storage/schema/delta/30/deleted_pushers.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/deleted_pushers.sql
diff --git a/synapse/storage/schema/delta/30/presence_stream.sql b/synapse/storage/data_stores/main/schema/delta/30/presence_stream.sql
index 606bbb037d..606bbb037d 100644
--- a/synapse/storage/schema/delta/30/presence_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/presence_stream.sql
diff --git a/synapse/storage/schema/delta/30/public_rooms.sql b/synapse/storage/data_stores/main/schema/delta/30/public_rooms.sql
index f09db4faa6..f09db4faa6 100644
--- a/synapse/storage/schema/delta/30/public_rooms.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/public_rooms.sql
diff --git a/synapse/storage/schema/delta/30/push_rule_stream.sql b/synapse/storage/data_stores/main/schema/delta/30/push_rule_stream.sql
index 735aa8d5f6..735aa8d5f6 100644
--- a/synapse/storage/schema/delta/30/push_rule_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/push_rule_stream.sql
diff --git a/synapse/storage/schema/delta/30/state_stream.sql b/synapse/storage/data_stores/main/schema/delta/30/state_stream.sql
index e85699e82e..e85699e82e 100644
--- a/synapse/storage/schema/delta/30/state_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/state_stream.sql
diff --git a/synapse/storage/schema/delta/30/threepid_guest_access_tokens.sql b/synapse/storage/data_stores/main/schema/delta/30/threepid_guest_access_tokens.sql
index 0dd2f1360c..0dd2f1360c 100644
--- a/synapse/storage/schema/delta/30/threepid_guest_access_tokens.sql
+++ b/synapse/storage/data_stores/main/schema/delta/30/threepid_guest_access_tokens.sql
diff --git a/synapse/storage/schema/delta/31/invites.sql b/synapse/storage/data_stores/main/schema/delta/31/invites.sql
index 2c57846d5a..2c57846d5a 100644
--- a/synapse/storage/schema/delta/31/invites.sql
+++ b/synapse/storage/data_stores/main/schema/delta/31/invites.sql
diff --git a/synapse/storage/schema/delta/31/local_media_repository_url_cache.sql b/synapse/storage/data_stores/main/schema/delta/31/local_media_repository_url_cache.sql
index 9efb4280eb..9efb4280eb 100644
--- a/synapse/storage/schema/delta/31/local_media_repository_url_cache.sql
+++ b/synapse/storage/data_stores/main/schema/delta/31/local_media_repository_url_cache.sql
diff --git a/synapse/storage/schema/delta/31/pushers.py b/synapse/storage/data_stores/main/schema/delta/31/pushers.py
index 9bb504aad5..9bb504aad5 100644
--- a/synapse/storage/schema/delta/31/pushers.py
+++ b/synapse/storage/data_stores/main/schema/delta/31/pushers.py
diff --git a/synapse/storage/schema/delta/31/pushers_index.sql b/synapse/storage/data_stores/main/schema/delta/31/pushers_index.sql
index a82add88fd..a82add88fd 100644
--- a/synapse/storage/schema/delta/31/pushers_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/31/pushers_index.sql
diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/data_stores/main/schema/delta/31/search_update.py
index 7d8ca5f93f..7d8ca5f93f 100644
--- a/synapse/storage/schema/delta/31/search_update.py
+++ b/synapse/storage/data_stores/main/schema/delta/31/search_update.py
diff --git a/synapse/storage/schema/delta/32/events.sql b/synapse/storage/data_stores/main/schema/delta/32/events.sql
index 1dd0f9e170..1dd0f9e170 100644
--- a/synapse/storage/schema/delta/32/events.sql
+++ b/synapse/storage/data_stores/main/schema/delta/32/events.sql
diff --git a/synapse/storage/schema/delta/32/openid.sql b/synapse/storage/data_stores/main/schema/delta/32/openid.sql
index 36f37b11c8..36f37b11c8 100644
--- a/synapse/storage/schema/delta/32/openid.sql
+++ b/synapse/storage/data_stores/main/schema/delta/32/openid.sql
diff --git a/synapse/storage/schema/delta/32/pusher_throttle.sql b/synapse/storage/data_stores/main/schema/delta/32/pusher_throttle.sql
index d86d30c13c..d86d30c13c 100644
--- a/synapse/storage/schema/delta/32/pusher_throttle.sql
+++ b/synapse/storage/data_stores/main/schema/delta/32/pusher_throttle.sql
diff --git a/synapse/storage/schema/delta/32/remove_indices.sql b/synapse/storage/data_stores/main/schema/delta/32/remove_indices.sql
index 4219cdd06a..4219cdd06a 100644
--- a/synapse/storage/schema/delta/32/remove_indices.sql
+++ b/synapse/storage/data_stores/main/schema/delta/32/remove_indices.sql
diff --git a/synapse/storage/schema/delta/32/reports.sql b/synapse/storage/data_stores/main/schema/delta/32/reports.sql
index d13609776f..d13609776f 100644
--- a/synapse/storage/schema/delta/32/reports.sql
+++ b/synapse/storage/data_stores/main/schema/delta/32/reports.sql
diff --git a/synapse/storage/schema/delta/33/access_tokens_device_index.sql b/synapse/storage/data_stores/main/schema/delta/33/access_tokens_device_index.sql
index 61ad3fe3e8..61ad3fe3e8 100644
--- a/synapse/storage/schema/delta/33/access_tokens_device_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/33/access_tokens_device_index.sql
diff --git a/synapse/storage/schema/delta/33/devices.sql b/synapse/storage/data_stores/main/schema/delta/33/devices.sql
index eca7268d82..eca7268d82 100644
--- a/synapse/storage/schema/delta/33/devices.sql
+++ b/synapse/storage/data_stores/main/schema/delta/33/devices.sql
diff --git a/synapse/storage/schema/delta/33/devices_for_e2e_keys.sql b/synapse/storage/data_stores/main/schema/delta/33/devices_for_e2e_keys.sql
index aa4a3b9f2f..aa4a3b9f2f 100644
--- a/synapse/storage/schema/delta/33/devices_for_e2e_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/33/devices_for_e2e_keys.sql
diff --git a/synapse/storage/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql b/synapse/storage/data_stores/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
index 6671573398..6671573398 100644
--- a/synapse/storage/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
+++ b/synapse/storage/data_stores/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
diff --git a/synapse/storage/schema/delta/33/event_fields.py b/synapse/storage/data_stores/main/schema/delta/33/event_fields.py
index bff1256a7b..bff1256a7b 100644
--- a/synapse/storage/schema/delta/33/event_fields.py
+++ b/synapse/storage/data_stores/main/schema/delta/33/event_fields.py
diff --git a/synapse/storage/schema/delta/33/remote_media_ts.py b/synapse/storage/data_stores/main/schema/delta/33/remote_media_ts.py
index a26057dfb6..a26057dfb6 100644
--- a/synapse/storage/schema/delta/33/remote_media_ts.py
+++ b/synapse/storage/data_stores/main/schema/delta/33/remote_media_ts.py
diff --git a/synapse/storage/schema/delta/33/user_ips_index.sql b/synapse/storage/data_stores/main/schema/delta/33/user_ips_index.sql
index 473f75a78e..473f75a78e 100644
--- a/synapse/storage/schema/delta/33/user_ips_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/33/user_ips_index.sql
diff --git a/synapse/storage/schema/delta/34/appservice_stream.sql b/synapse/storage/data_stores/main/schema/delta/34/appservice_stream.sql
index 69e16eda0f..69e16eda0f 100644
--- a/synapse/storage/schema/delta/34/appservice_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/34/appservice_stream.sql
diff --git a/synapse/storage/schema/delta/34/cache_stream.py b/synapse/storage/data_stores/main/schema/delta/34/cache_stream.py
index cf09e43e2b..cf09e43e2b 100644
--- a/synapse/storage/schema/delta/34/cache_stream.py
+++ b/synapse/storage/data_stores/main/schema/delta/34/cache_stream.py
diff --git a/synapse/storage/schema/delta/34/device_inbox.sql b/synapse/storage/data_stores/main/schema/delta/34/device_inbox.sql
index e68844c74a..e68844c74a 100644
--- a/synapse/storage/schema/delta/34/device_inbox.sql
+++ b/synapse/storage/data_stores/main/schema/delta/34/device_inbox.sql
diff --git a/synapse/storage/schema/delta/34/push_display_name_rename.sql b/synapse/storage/data_stores/main/schema/delta/34/push_display_name_rename.sql
index 0d9fe1a99a..0d9fe1a99a 100644
--- a/synapse/storage/schema/delta/34/push_display_name_rename.sql
+++ b/synapse/storage/data_stores/main/schema/delta/34/push_display_name_rename.sql
diff --git a/synapse/storage/schema/delta/34/received_txn_purge.py b/synapse/storage/data_stores/main/schema/delta/34/received_txn_purge.py
index 67d505e68b..67d505e68b 100644
--- a/synapse/storage/schema/delta/34/received_txn_purge.py
+++ b/synapse/storage/data_stores/main/schema/delta/34/received_txn_purge.py
diff --git a/synapse/storage/schema/delta/35/add_state_index.sql b/synapse/storage/data_stores/main/schema/delta/35/add_state_index.sql
index 0fce26345b..33980d02f0 100644
--- a/synapse/storage/schema/delta/35/add_state_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/add_state_index.sql
@@ -13,8 +13,5 @@
* limitations under the License.
*/
-
-ALTER TABLE background_updates ADD COLUMN depends_on TEXT;
-
INSERT into background_updates (update_name, progress_json, depends_on)
VALUES ('state_group_state_type_index', '{}', 'state_group_state_deduplication');
diff --git a/synapse/storage/schema/delta/35/contains_url.sql b/synapse/storage/data_stores/main/schema/delta/35/contains_url.sql
index 6cd123027b..6cd123027b 100644
--- a/synapse/storage/schema/delta/35/contains_url.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/contains_url.sql
diff --git a/synapse/storage/schema/delta/35/device_outbox.sql b/synapse/storage/data_stores/main/schema/delta/35/device_outbox.sql
index 17e6c43105..17e6c43105 100644
--- a/synapse/storage/schema/delta/35/device_outbox.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/device_outbox.sql
diff --git a/synapse/storage/schema/delta/35/device_stream_id.sql b/synapse/storage/data_stores/main/schema/delta/35/device_stream_id.sql
index 7ab7d942e2..7ab7d942e2 100644
--- a/synapse/storage/schema/delta/35/device_stream_id.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/device_stream_id.sql
diff --git a/synapse/storage/schema/delta/35/event_push_actions_index.sql b/synapse/storage/data_stores/main/schema/delta/35/event_push_actions_index.sql
index 2e836d8e9c..2e836d8e9c 100644
--- a/synapse/storage/schema/delta/35/event_push_actions_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/event_push_actions_index.sql
diff --git a/synapse/storage/schema/delta/35/public_room_list_change_stream.sql b/synapse/storage/data_stores/main/schema/delta/35/public_room_list_change_stream.sql
index dd2bf2e28a..dd2bf2e28a 100644
--- a/synapse/storage/schema/delta/35/public_room_list_change_stream.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/public_room_list_change_stream.sql
diff --git a/synapse/storage/schema/delta/35/state.sql b/synapse/storage/data_stores/main/schema/delta/35/state.sql
index 0f1fa68a89..0f1fa68a89 100644
--- a/synapse/storage/schema/delta/35/state.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/state.sql
diff --git a/synapse/storage/schema/delta/35/state_dedupe.sql b/synapse/storage/data_stores/main/schema/delta/35/state_dedupe.sql
index 97e5067ef4..97e5067ef4 100644
--- a/synapse/storage/schema/delta/35/state_dedupe.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/state_dedupe.sql
diff --git a/synapse/storage/schema/delta/35/stream_order_to_extrem.sql b/synapse/storage/data_stores/main/schema/delta/35/stream_order_to_extrem.sql
index 2b945d8a57..2b945d8a57 100644
--- a/synapse/storage/schema/delta/35/stream_order_to_extrem.sql
+++ b/synapse/storage/data_stores/main/schema/delta/35/stream_order_to_extrem.sql
diff --git a/synapse/storage/schema/delta/36/readd_public_rooms.sql b/synapse/storage/data_stores/main/schema/delta/36/readd_public_rooms.sql
index 90d8fd18f9..90d8fd18f9 100644
--- a/synapse/storage/schema/delta/36/readd_public_rooms.sql
+++ b/synapse/storage/data_stores/main/schema/delta/36/readd_public_rooms.sql
diff --git a/synapse/storage/schema/delta/37/remove_auth_idx.py b/synapse/storage/data_stores/main/schema/delta/37/remove_auth_idx.py
index a377884169..a377884169 100644
--- a/synapse/storage/schema/delta/37/remove_auth_idx.py
+++ b/synapse/storage/data_stores/main/schema/delta/37/remove_auth_idx.py
diff --git a/synapse/storage/schema/delta/37/user_threepids.sql b/synapse/storage/data_stores/main/schema/delta/37/user_threepids.sql
index cf7a90dd10..cf7a90dd10 100644
--- a/synapse/storage/schema/delta/37/user_threepids.sql
+++ b/synapse/storage/data_stores/main/schema/delta/37/user_threepids.sql
diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/data_stores/main/schema/delta/38/postgres_fts_gist.sql
index 515e6b8e84..515e6b8e84 100644
--- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql
+++ b/synapse/storage/data_stores/main/schema/delta/38/postgres_fts_gist.sql
diff --git a/synapse/storage/schema/delta/39/appservice_room_list.sql b/synapse/storage/data_stores/main/schema/delta/39/appservice_room_list.sql
index 74bdc49073..74bdc49073 100644
--- a/synapse/storage/schema/delta/39/appservice_room_list.sql
+++ b/synapse/storage/data_stores/main/schema/delta/39/appservice_room_list.sql
diff --git a/synapse/storage/schema/delta/39/device_federation_stream_idx.sql b/synapse/storage/data_stores/main/schema/delta/39/device_federation_stream_idx.sql
index 00be801e90..00be801e90 100644
--- a/synapse/storage/schema/delta/39/device_federation_stream_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/39/device_federation_stream_idx.sql
diff --git a/synapse/storage/schema/delta/39/event_push_index.sql b/synapse/storage/data_stores/main/schema/delta/39/event_push_index.sql
index de2ad93e5c..de2ad93e5c 100644
--- a/synapse/storage/schema/delta/39/event_push_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/39/event_push_index.sql
diff --git a/synapse/storage/schema/delta/39/federation_out_position.sql b/synapse/storage/data_stores/main/schema/delta/39/federation_out_position.sql
index 5af814290b..5af814290b 100644
--- a/synapse/storage/schema/delta/39/federation_out_position.sql
+++ b/synapse/storage/data_stores/main/schema/delta/39/federation_out_position.sql
diff --git a/synapse/storage/schema/delta/39/membership_profile.sql b/synapse/storage/data_stores/main/schema/delta/39/membership_profile.sql
index 1bf911c8ab..1bf911c8ab 100644
--- a/synapse/storage/schema/delta/39/membership_profile.sql
+++ b/synapse/storage/data_stores/main/schema/delta/39/membership_profile.sql
diff --git a/synapse/storage/schema/delta/40/current_state_idx.sql b/synapse/storage/data_stores/main/schema/delta/40/current_state_idx.sql
index 7ffa189f39..7ffa189f39 100644
--- a/synapse/storage/schema/delta/40/current_state_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/40/current_state_idx.sql
diff --git a/synapse/storage/schema/delta/40/device_inbox.sql b/synapse/storage/data_stores/main/schema/delta/40/device_inbox.sql
index b9fe1f0480..b9fe1f0480 100644
--- a/synapse/storage/schema/delta/40/device_inbox.sql
+++ b/synapse/storage/data_stores/main/schema/delta/40/device_inbox.sql
diff --git a/synapse/storage/schema/delta/40/device_list_streams.sql b/synapse/storage/data_stores/main/schema/delta/40/device_list_streams.sql
index dd6dcb65f1..dd6dcb65f1 100644
--- a/synapse/storage/schema/delta/40/device_list_streams.sql
+++ b/synapse/storage/data_stores/main/schema/delta/40/device_list_streams.sql
diff --git a/synapse/storage/schema/delta/40/event_push_summary.sql b/synapse/storage/data_stores/main/schema/delta/40/event_push_summary.sql
index 3918f0b794..3918f0b794 100644
--- a/synapse/storage/schema/delta/40/event_push_summary.sql
+++ b/synapse/storage/data_stores/main/schema/delta/40/event_push_summary.sql
diff --git a/synapse/storage/schema/delta/40/pushers.sql b/synapse/storage/data_stores/main/schema/delta/40/pushers.sql
index 054a223f14..054a223f14 100644
--- a/synapse/storage/schema/delta/40/pushers.sql
+++ b/synapse/storage/data_stores/main/schema/delta/40/pushers.sql
diff --git a/synapse/storage/schema/delta/41/device_list_stream_idx.sql b/synapse/storage/data_stores/main/schema/delta/41/device_list_stream_idx.sql
index b7bee8b692..b7bee8b692 100644
--- a/synapse/storage/schema/delta/41/device_list_stream_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/41/device_list_stream_idx.sql
diff --git a/synapse/storage/schema/delta/41/device_outbound_index.sql b/synapse/storage/data_stores/main/schema/delta/41/device_outbound_index.sql
index 62f0b9892b..62f0b9892b 100644
--- a/synapse/storage/schema/delta/41/device_outbound_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/41/device_outbound_index.sql
diff --git a/synapse/storage/schema/delta/41/event_search_event_id_idx.sql b/synapse/storage/data_stores/main/schema/delta/41/event_search_event_id_idx.sql
index 5d9cfecf36..5d9cfecf36 100644
--- a/synapse/storage/schema/delta/41/event_search_event_id_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/41/event_search_event_id_idx.sql
diff --git a/synapse/storage/schema/delta/41/ratelimit.sql b/synapse/storage/data_stores/main/schema/delta/41/ratelimit.sql
index a194bf0238..a194bf0238 100644
--- a/synapse/storage/schema/delta/41/ratelimit.sql
+++ b/synapse/storage/data_stores/main/schema/delta/41/ratelimit.sql
diff --git a/synapse/storage/schema/delta/42/current_state_delta.sql b/synapse/storage/data_stores/main/schema/delta/42/current_state_delta.sql
index d28851aff8..d28851aff8 100644
--- a/synapse/storage/schema/delta/42/current_state_delta.sql
+++ b/synapse/storage/data_stores/main/schema/delta/42/current_state_delta.sql
diff --git a/synapse/storage/schema/delta/42/device_list_last_id.sql b/synapse/storage/data_stores/main/schema/delta/42/device_list_last_id.sql
index 9ab8c14fa3..9ab8c14fa3 100644
--- a/synapse/storage/schema/delta/42/device_list_last_id.sql
+++ b/synapse/storage/data_stores/main/schema/delta/42/device_list_last_id.sql
diff --git a/synapse/storage/schema/delta/42/event_auth_state_only.sql b/synapse/storage/data_stores/main/schema/delta/42/event_auth_state_only.sql
index b8821ac759..b8821ac759 100644
--- a/synapse/storage/schema/delta/42/event_auth_state_only.sql
+++ b/synapse/storage/data_stores/main/schema/delta/42/event_auth_state_only.sql
diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/data_stores/main/schema/delta/42/user_dir.py
index 506f326f4d..506f326f4d 100644
--- a/synapse/storage/schema/delta/42/user_dir.py
+++ b/synapse/storage/data_stores/main/schema/delta/42/user_dir.py
diff --git a/synapse/storage/schema/delta/43/blocked_rooms.sql b/synapse/storage/data_stores/main/schema/delta/43/blocked_rooms.sql
index 0e3cd143ff..0e3cd143ff 100644
--- a/synapse/storage/schema/delta/43/blocked_rooms.sql
+++ b/synapse/storage/data_stores/main/schema/delta/43/blocked_rooms.sql
diff --git a/synapse/storage/schema/delta/43/quarantine_media.sql b/synapse/storage/data_stores/main/schema/delta/43/quarantine_media.sql
index 630907ec4f..630907ec4f 100644
--- a/synapse/storage/schema/delta/43/quarantine_media.sql
+++ b/synapse/storage/data_stores/main/schema/delta/43/quarantine_media.sql
diff --git a/synapse/storage/schema/delta/43/url_cache.sql b/synapse/storage/data_stores/main/schema/delta/43/url_cache.sql
index 45ebe020da..45ebe020da 100644
--- a/synapse/storage/schema/delta/43/url_cache.sql
+++ b/synapse/storage/data_stores/main/schema/delta/43/url_cache.sql
diff --git a/synapse/storage/schema/delta/43/user_share.sql b/synapse/storage/data_stores/main/schema/delta/43/user_share.sql
index ee7062abe4..ee7062abe4 100644
--- a/synapse/storage/schema/delta/43/user_share.sql
+++ b/synapse/storage/data_stores/main/schema/delta/43/user_share.sql
diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/data_stores/main/schema/delta/44/expire_url_cache.sql
index b12f9b2ebf..b12f9b2ebf 100644
--- a/synapse/storage/schema/delta/44/expire_url_cache.sql
+++ b/synapse/storage/data_stores/main/schema/delta/44/expire_url_cache.sql
diff --git a/synapse/storage/schema/delta/45/group_server.sql b/synapse/storage/data_stores/main/schema/delta/45/group_server.sql
index b2333848a0..b2333848a0 100644
--- a/synapse/storage/schema/delta/45/group_server.sql
+++ b/synapse/storage/data_stores/main/schema/delta/45/group_server.sql
diff --git a/synapse/storage/schema/delta/45/profile_cache.sql b/synapse/storage/data_stores/main/schema/delta/45/profile_cache.sql
index e5ddc84df0..e5ddc84df0 100644
--- a/synapse/storage/schema/delta/45/profile_cache.sql
+++ b/synapse/storage/data_stores/main/schema/delta/45/profile_cache.sql
diff --git a/synapse/storage/schema/delta/46/drop_refresh_tokens.sql b/synapse/storage/data_stores/main/schema/delta/46/drop_refresh_tokens.sql
index 68c48a89a9..68c48a89a9 100644
--- a/synapse/storage/schema/delta/46/drop_refresh_tokens.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/drop_refresh_tokens.sql
diff --git a/synapse/storage/schema/delta/46/drop_unique_deleted_pushers.sql b/synapse/storage/data_stores/main/schema/delta/46/drop_unique_deleted_pushers.sql
index bb307889c1..bb307889c1 100644
--- a/synapse/storage/schema/delta/46/drop_unique_deleted_pushers.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/drop_unique_deleted_pushers.sql
diff --git a/synapse/storage/schema/delta/46/group_server.sql b/synapse/storage/data_stores/main/schema/delta/46/group_server.sql
index 097679bc9a..097679bc9a 100644
--- a/synapse/storage/schema/delta/46/group_server.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/group_server.sql
diff --git a/synapse/storage/schema/delta/46/local_media_repository_url_idx.sql b/synapse/storage/data_stores/main/schema/delta/46/local_media_repository_url_idx.sql
index bbfc7f5d1a..bbfc7f5d1a 100644
--- a/synapse/storage/schema/delta/46/local_media_repository_url_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/local_media_repository_url_idx.sql
diff --git a/synapse/storage/schema/delta/46/user_dir_null_room_ids.sql b/synapse/storage/data_stores/main/schema/delta/46/user_dir_null_room_ids.sql
index cb0d5a2576..cb0d5a2576 100644
--- a/synapse/storage/schema/delta/46/user_dir_null_room_ids.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/user_dir_null_room_ids.sql
diff --git a/synapse/storage/schema/delta/46/user_dir_typos.sql b/synapse/storage/data_stores/main/schema/delta/46/user_dir_typos.sql
index d9505f8da1..d9505f8da1 100644
--- a/synapse/storage/schema/delta/46/user_dir_typos.sql
+++ b/synapse/storage/data_stores/main/schema/delta/46/user_dir_typos.sql
diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/data_stores/main/schema/delta/47/last_access_media.sql
index f505fb22b5..f505fb22b5 100644
--- a/synapse/storage/schema/delta/47/last_access_media.sql
+++ b/synapse/storage/data_stores/main/schema/delta/47/last_access_media.sql
diff --git a/synapse/storage/schema/delta/47/postgres_fts_gin.sql b/synapse/storage/data_stores/main/schema/delta/47/postgres_fts_gin.sql
index 31d7a817eb..31d7a817eb 100644
--- a/synapse/storage/schema/delta/47/postgres_fts_gin.sql
+++ b/synapse/storage/data_stores/main/schema/delta/47/postgres_fts_gin.sql
diff --git a/synapse/storage/schema/delta/47/push_actions_staging.sql b/synapse/storage/data_stores/main/schema/delta/47/push_actions_staging.sql
index edccf4a96f..edccf4a96f 100644
--- a/synapse/storage/schema/delta/47/push_actions_staging.sql
+++ b/synapse/storage/data_stores/main/schema/delta/47/push_actions_staging.sql
diff --git a/synapse/storage/schema/delta/47/state_group_seq.py b/synapse/storage/data_stores/main/schema/delta/47/state_group_seq.py
index 9fd1ccf6f7..9fd1ccf6f7 100644
--- a/synapse/storage/schema/delta/47/state_group_seq.py
+++ b/synapse/storage/data_stores/main/schema/delta/47/state_group_seq.py
diff --git a/synapse/storage/schema/delta/48/add_user_consent.sql b/synapse/storage/data_stores/main/schema/delta/48/add_user_consent.sql
index 5237491506..5237491506 100644
--- a/synapse/storage/schema/delta/48/add_user_consent.sql
+++ b/synapse/storage/data_stores/main/schema/delta/48/add_user_consent.sql
diff --git a/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql b/synapse/storage/data_stores/main/schema/delta/48/add_user_ips_last_seen_index.sql
index 9248b0b24a..9248b0b24a 100644
--- a/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/48/add_user_ips_last_seen_index.sql
diff --git a/synapse/storage/schema/delta/48/deactivated_users.sql b/synapse/storage/data_stores/main/schema/delta/48/deactivated_users.sql
index e9013a6969..e9013a6969 100644
--- a/synapse/storage/schema/delta/48/deactivated_users.sql
+++ b/synapse/storage/data_stores/main/schema/delta/48/deactivated_users.sql
diff --git a/synapse/storage/schema/delta/48/group_unique_indexes.py b/synapse/storage/data_stores/main/schema/delta/48/group_unique_indexes.py
index 49f5f2c003..49f5f2c003 100644
--- a/synapse/storage/schema/delta/48/group_unique_indexes.py
+++ b/synapse/storage/data_stores/main/schema/delta/48/group_unique_indexes.py
diff --git a/synapse/storage/schema/delta/48/groups_joinable.sql b/synapse/storage/data_stores/main/schema/delta/48/groups_joinable.sql
index ce26eaf0c9..ce26eaf0c9 100644
--- a/synapse/storage/schema/delta/48/groups_joinable.sql
+++ b/synapse/storage/data_stores/main/schema/delta/48/groups_joinable.sql
diff --git a/synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql b/synapse/storage/data_stores/main/schema/delta/49/add_user_consent_server_notice_sent.sql
index 14dcf18d73..14dcf18d73 100644
--- a/synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql
+++ b/synapse/storage/data_stores/main/schema/delta/49/add_user_consent_server_notice_sent.sql
diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/data_stores/main/schema/delta/49/add_user_daily_visits.sql
index 3dd478196f..3dd478196f 100644
--- a/synapse/storage/schema/delta/49/add_user_daily_visits.sql
+++ b/synapse/storage/data_stores/main/schema/delta/49/add_user_daily_visits.sql
diff --git a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/data_stores/main/schema/delta/49/add_user_ips_last_seen_only_index.sql
index 3a4ed59b5b..3a4ed59b5b 100644
--- a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/49/add_user_ips_last_seen_only_index.sql
diff --git a/synapse/storage/schema/delta/50/add_creation_ts_users_index.sql b/synapse/storage/data_stores/main/schema/delta/50/add_creation_ts_users_index.sql
index c93ae47532..c93ae47532 100644
--- a/synapse/storage/schema/delta/50/add_creation_ts_users_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/50/add_creation_ts_users_index.sql
diff --git a/synapse/storage/schema/delta/50/erasure_store.sql b/synapse/storage/data_stores/main/schema/delta/50/erasure_store.sql
index 5d8641a9ab..5d8641a9ab 100644
--- a/synapse/storage/schema/delta/50/erasure_store.sql
+++ b/synapse/storage/data_stores/main/schema/delta/50/erasure_store.sql
diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/data_stores/main/schema/delta/50/make_event_content_nullable.py
index b1684a8441..b1684a8441 100644
--- a/synapse/storage/schema/delta/50/make_event_content_nullable.py
+++ b/synapse/storage/data_stores/main/schema/delta/50/make_event_content_nullable.py
diff --git a/synapse/storage/schema/delta/51/e2e_room_keys.sql b/synapse/storage/data_stores/main/schema/delta/51/e2e_room_keys.sql
index c0e66a697d..c0e66a697d 100644
--- a/synapse/storage/schema/delta/51/e2e_room_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/51/e2e_room_keys.sql
diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/data_stores/main/schema/delta/51/monthly_active_users.sql
index c9d537d5a3..c9d537d5a3 100644
--- a/synapse/storage/schema/delta/51/monthly_active_users.sql
+++ b/synapse/storage/data_stores/main/schema/delta/51/monthly_active_users.sql
diff --git a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/data_stores/main/schema/delta/52/add_event_to_state_group_index.sql
index 91e03d13e1..91e03d13e1 100644
--- a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/52/add_event_to_state_group_index.sql
diff --git a/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql b/synapse/storage/data_stores/main/schema/delta/52/device_list_streams_unique_idx.sql
index bfa49e6f92..bfa49e6f92 100644
--- a/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/52/device_list_streams_unique_idx.sql
diff --git a/synapse/storage/schema/delta/52/e2e_room_keys.sql b/synapse/storage/data_stores/main/schema/delta/52/e2e_room_keys.sql
index db687cccae..db687cccae 100644
--- a/synapse/storage/schema/delta/52/e2e_room_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/52/e2e_room_keys.sql
diff --git a/synapse/storage/schema/delta/53/add_user_type_to_users.sql b/synapse/storage/data_stores/main/schema/delta/53/add_user_type_to_users.sql
index 88ec2f83e5..88ec2f83e5 100644
--- a/synapse/storage/schema/delta/53/add_user_type_to_users.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/add_user_type_to_users.sql
diff --git a/synapse/storage/schema/delta/53/drop_sent_transactions.sql b/synapse/storage/data_stores/main/schema/delta/53/drop_sent_transactions.sql
index e372f5a44a..e372f5a44a 100644
--- a/synapse/storage/schema/delta/53/drop_sent_transactions.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/drop_sent_transactions.sql
diff --git a/synapse/storage/schema/delta/53/event_format_version.sql b/synapse/storage/data_stores/main/schema/delta/53/event_format_version.sql
index 1d977c2834..1d977c2834 100644
--- a/synapse/storage/schema/delta/53/event_format_version.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/event_format_version.sql
diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/data_stores/main/schema/delta/53/user_dir_populate.sql
index ffcc896b58..ffcc896b58 100644
--- a/synapse/storage/schema/delta/53/user_dir_populate.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/user_dir_populate.sql
diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/data_stores/main/schema/delta/53/user_ips_index.sql
index b812c5794f..b812c5794f 100644
--- a/synapse/storage/schema/delta/53/user_ips_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/user_ips_index.sql
diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/data_stores/main/schema/delta/53/user_share.sql
index 5831b1a6f8..5831b1a6f8 100644
--- a/synapse/storage/schema/delta/53/user_share.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/user_share.sql
diff --git a/synapse/storage/schema/delta/53/user_threepid_id.sql b/synapse/storage/data_stores/main/schema/delta/53/user_threepid_id.sql
index 80c2c573b6..80c2c573b6 100644
--- a/synapse/storage/schema/delta/53/user_threepid_id.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/user_threepid_id.sql
diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/data_stores/main/schema/delta/53/users_in_public_rooms.sql
index f7827ca6d2..f7827ca6d2 100644
--- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql
+++ b/synapse/storage/data_stores/main/schema/delta/53/users_in_public_rooms.sql
diff --git a/synapse/storage/schema/delta/54/account_validity_with_renewal.sql b/synapse/storage/data_stores/main/schema/delta/54/account_validity_with_renewal.sql
index 0adb2ad55e..0adb2ad55e 100644
--- a/synapse/storage/schema/delta/54/account_validity_with_renewal.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/account_validity_with_renewal.sql
diff --git a/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql b/synapse/storage/data_stores/main/schema/delta/54/add_validity_to_server_keys.sql
index c01aa9d2d9..c01aa9d2d9 100644
--- a/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/add_validity_to_server_keys.sql
diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/data_stores/main/schema/delta/54/delete_forward_extremities.sql
index b062ec840c..b062ec840c 100644
--- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/delete_forward_extremities.sql
diff --git a/synapse/storage/schema/delta/54/drop_legacy_tables.sql b/synapse/storage/data_stores/main/schema/delta/54/drop_legacy_tables.sql
index dbbe682697..dbbe682697 100644
--- a/synapse/storage/schema/delta/54/drop_legacy_tables.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/drop_legacy_tables.sql
diff --git a/synapse/storage/schema/delta/54/drop_presence_list.sql b/synapse/storage/data_stores/main/schema/delta/54/drop_presence_list.sql
index e6ee70c623..e6ee70c623 100644
--- a/synapse/storage/schema/delta/54/drop_presence_list.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/drop_presence_list.sql
diff --git a/synapse/storage/schema/delta/54/relations.sql b/synapse/storage/data_stores/main/schema/delta/54/relations.sql
index 134862b870..134862b870 100644
--- a/synapse/storage/schema/delta/54/relations.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/relations.sql
diff --git a/synapse/storage/schema/delta/54/stats.sql b/synapse/storage/data_stores/main/schema/delta/54/stats.sql
index 652e58308e..652e58308e 100644
--- a/synapse/storage/schema/delta/54/stats.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/stats.sql
diff --git a/synapse/storage/schema/delta/54/stats2.sql b/synapse/storage/data_stores/main/schema/delta/54/stats2.sql
index 3b2d48447f..3b2d48447f 100644
--- a/synapse/storage/schema/delta/54/stats2.sql
+++ b/synapse/storage/data_stores/main/schema/delta/54/stats2.sql
diff --git a/synapse/storage/schema/delta/55/access_token_expiry.sql b/synapse/storage/data_stores/main/schema/delta/55/access_token_expiry.sql
index 4590604bfd..4590604bfd 100644
--- a/synapse/storage/schema/delta/55/access_token_expiry.sql
+++ b/synapse/storage/data_stores/main/schema/delta/55/access_token_expiry.sql
diff --git a/synapse/storage/schema/delta/55/track_threepid_validations.sql b/synapse/storage/data_stores/main/schema/delta/55/track_threepid_validations.sql
index a8eced2e0a..a8eced2e0a 100644
--- a/synapse/storage/schema/delta/55/track_threepid_validations.sql
+++ b/synapse/storage/data_stores/main/schema/delta/55/track_threepid_validations.sql
diff --git a/synapse/storage/schema/delta/55/users_alter_deactivated.sql b/synapse/storage/data_stores/main/schema/delta/55/users_alter_deactivated.sql
index dabdde489b..dabdde489b 100644
--- a/synapse/storage/schema/delta/55/users_alter_deactivated.sql
+++ b/synapse/storage/data_stores/main/schema/delta/55/users_alter_deactivated.sql
diff --git a/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql b/synapse/storage/data_stores/main/schema/delta/56/add_spans_to_device_lists.sql
index 41807eb1e7..41807eb1e7 100644
--- a/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/add_spans_to_device_lists.sql
diff --git a/synapse/storage/schema/delta/56/current_state_events_membership.sql b/synapse/storage/data_stores/main/schema/delta/56/current_state_events_membership.sql
index 473018676f..473018676f 100644
--- a/synapse/storage/schema/delta/56/current_state_events_membership.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/current_state_events_membership.sql
diff --git a/synapse/storage/schema/delta/56/current_state_events_membership_mk2.sql b/synapse/storage/data_stores/main/schema/delta/56/current_state_events_membership_mk2.sql
index 3133d42d4a..3133d42d4a 100644
--- a/synapse/storage/schema/delta/56/current_state_events_membership_mk2.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/current_state_events_membership_mk2.sql
diff --git a/synapse/storage/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/data_stores/main/schema/delta/56/destinations_failure_ts.sql
index f00889290b..f00889290b 100644
--- a/synapse/storage/schema/delta/56/destinations_failure_ts.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/destinations_failure_ts.sql
diff --git a/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres b/synapse/storage/data_stores/main/schema/delta/56/destinations_retry_interval_type.sql.postgres
index b9bbb18a91..b9bbb18a91 100644
--- a/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres
+++ b/synapse/storage/data_stores/main/schema/delta/56/destinations_retry_interval_type.sql.postgres
diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/data_stores/main/schema/delta/56/devices_last_seen.sql
index dfa902d0ba..dfa902d0ba 100644
--- a/synapse/storage/schema/delta/56/devices_last_seen.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/devices_last_seen.sql
diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/data_stores/main/schema/delta/56/drop_unused_event_tables.sql
index 9f09922c67..9f09922c67 100644
--- a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/drop_unused_event_tables.sql
diff --git a/synapse/storage/schema/delta/56/fix_room_keys_index.sql b/synapse/storage/data_stores/main/schema/delta/56/fix_room_keys_index.sql
index 014cb3b538..014cb3b538 100644
--- a/synapse/storage/schema/delta/56/fix_room_keys_index.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/fix_room_keys_index.sql
diff --git a/synapse/storage/schema/delta/56/public_room_list_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/public_room_list_idx.sql
index 7be31ffebb..7be31ffebb 100644
--- a/synapse/storage/schema/delta/56/public_room_list_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/public_room_list_idx.sql
diff --git a/synapse/storage/schema/delta/56/redaction_censor.sql b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor.sql
index fe51b02309..fe51b02309 100644
--- a/synapse/storage/schema/delta/56/redaction_censor.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor.sql
diff --git a/synapse/storage/schema/delta/56/redaction_censor2.sql b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor2.sql
index 77a5eca499..77a5eca499 100644
--- a/synapse/storage/schema/delta/56/redaction_censor2.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor2.sql
diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres
index 67471f3ef5..67471f3ef5 100644
--- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
+++ b/synapse/storage/data_stores/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres
diff --git a/synapse/storage/schema/delta/56/room_membership_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/room_membership_idx.sql
index 92ab1f5e65..92ab1f5e65 100644
--- a/synapse/storage/schema/delta/56/room_membership_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/room_membership_idx.sql
diff --git a/synapse/storage/schema/delta/56/stats_separated.sql b/synapse/storage/data_stores/main/schema/delta/56/stats_separated.sql
index 163529c071..163529c071 100644
--- a/synapse/storage/schema/delta/56/stats_separated.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/stats_separated.sql
diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/data_stores/main/schema/delta/56/unique_user_filter_index.py
index 1de8b54961..1de8b54961 100644
--- a/synapse/storage/schema/delta/56/unique_user_filter_index.py
+++ b/synapse/storage/data_stores/main/schema/delta/56/unique_user_filter_index.py
diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/data_stores/main/schema/delta/56/user_external_ids.sql
index 91390c4527..91390c4527 100644
--- a/synapse/storage/schema/delta/56/user_external_ids.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/user_external_ids.sql
diff --git a/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/users_in_public_rooms_idx.sql
index 149f8be8b6..149f8be8b6 100644
--- a/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql
+++ b/synapse/storage/data_stores/main/schema/delta/56/users_in_public_rooms_idx.sql
diff --git a/synapse/storage/schema/full_schemas/16/application_services.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/application_services.sql
index 883fcd10b2..883fcd10b2 100644
--- a/synapse/storage/schema/full_schemas/16/application_services.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/application_services.sql
diff --git a/synapse/storage/schema/full_schemas/16/event_edges.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/event_edges.sql
index 10ce2aa7a0..10ce2aa7a0 100644
--- a/synapse/storage/schema/full_schemas/16/event_edges.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/event_edges.sql
diff --git a/synapse/storage/schema/full_schemas/16/event_signatures.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/event_signatures.sql
index 95826da431..95826da431 100644
--- a/synapse/storage/schema/full_schemas/16/event_signatures.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/event_signatures.sql
diff --git a/synapse/storage/schema/full_schemas/16/im.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/im.sql
index a1a2aa8e5b..a1a2aa8e5b 100644
--- a/synapse/storage/schema/full_schemas/16/im.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/im.sql
diff --git a/synapse/storage/schema/full_schemas/16/keys.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/keys.sql
index 11cdffdbb3..11cdffdbb3 100644
--- a/synapse/storage/schema/full_schemas/16/keys.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/keys.sql
diff --git a/synapse/storage/schema/full_schemas/16/media_repository.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/media_repository.sql
index 8f3759bb2a..8f3759bb2a 100644
--- a/synapse/storage/schema/full_schemas/16/media_repository.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/media_repository.sql
diff --git a/synapse/storage/schema/full_schemas/16/presence.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/presence.sql
index 01d2d8f833..01d2d8f833 100644
--- a/synapse/storage/schema/full_schemas/16/presence.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/presence.sql
diff --git a/synapse/storage/schema/full_schemas/16/profiles.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/profiles.sql
index c04f4747d9..c04f4747d9 100644
--- a/synapse/storage/schema/full_schemas/16/profiles.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/profiles.sql
diff --git a/synapse/storage/schema/full_schemas/16/push.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/push.sql
index e44465cf45..e44465cf45 100644
--- a/synapse/storage/schema/full_schemas/16/push.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/push.sql
diff --git a/synapse/storage/schema/full_schemas/16/redactions.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/redactions.sql
index 318f0d9aa5..318f0d9aa5 100644
--- a/synapse/storage/schema/full_schemas/16/redactions.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/redactions.sql
diff --git a/synapse/storage/schema/full_schemas/16/room_aliases.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/room_aliases.sql
index d47da3b12f..d47da3b12f 100644
--- a/synapse/storage/schema/full_schemas/16/room_aliases.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/room_aliases.sql
diff --git a/synapse/storage/schema/full_schemas/16/state.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/state.sql
index 96391a8f0e..96391a8f0e 100644
--- a/synapse/storage/schema/full_schemas/16/state.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/state.sql
diff --git a/synapse/storage/schema/full_schemas/16/transactions.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/transactions.sql
index 17e67bedac..17e67bedac 100644
--- a/synapse/storage/schema/full_schemas/16/transactions.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/transactions.sql
diff --git a/synapse/storage/schema/full_schemas/16/users.sql b/synapse/storage/data_stores/main/schema/full_schemas/16/users.sql
index f013aa8b18..f013aa8b18 100644
--- a/synapse/storage/schema/full_schemas/16/users.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/16/users.sql
diff --git a/synapse/storage/schema/full_schemas/54/full.sql.postgres b/synapse/storage/data_stores/main/schema/full_schemas/54/full.sql.postgres
index 098434356f..4ad2929f32 100644
--- a/synapse/storage/schema/full_schemas/54/full.sql.postgres
+++ b/synapse/storage/data_stores/main/schema/full_schemas/54/full.sql.postgres
@@ -70,15 +70,6 @@ CREATE TABLE appservice_stream_position (
);
-
-CREATE TABLE background_updates (
- update_name text NOT NULL,
- progress_json text NOT NULL,
- depends_on text
-);
-
-
-
CREATE TABLE blocked_rooms (
room_id text NOT NULL,
user_id text NOT NULL
@@ -1202,11 +1193,6 @@ ALTER TABLE ONLY appservice_stream_position
-ALTER TABLE ONLY background_updates
- ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
-
-
-
ALTER TABLE ONLY current_state_events
ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id);
@@ -2047,6 +2033,3 @@ CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_room
CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id);
-
-
-
diff --git a/synapse/storage/schema/full_schemas/54/full.sql.sqlite b/synapse/storage/data_stores/main/schema/full_schemas/54/full.sql.sqlite
index be9295e4c9..bad33291e7 100644
--- a/synapse/storage/schema/full_schemas/54/full.sql.sqlite
+++ b/synapse/storage/data_stores/main/schema/full_schemas/54/full.sql.sqlite
@@ -67,7 +67,6 @@ CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) );
CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
-CREATE TABLE background_updates( update_name TEXT NOT NULL, progress_json TEXT NOT NULL, depends_on TEXT, CONSTRAINT background_updates_uniqueness UNIQUE (update_name) );
CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value )
/* event_search(event_id,room_id,sender,"key",value) */;
CREATE TABLE IF NOT EXISTS 'event_search_content'(docid INTEGER PRIMARY KEY, 'c0event_id', 'c1room_id', 'c2sender', 'c3key', 'c4value');
diff --git a/synapse/storage/schema/full_schemas/54/stream_positions.sql b/synapse/storage/data_stores/main/schema/full_schemas/54/stream_positions.sql
index c265fd20e2..c265fd20e2 100644
--- a/synapse/storage/schema/full_schemas/54/stream_positions.sql
+++ b/synapse/storage/data_stores/main/schema/full_schemas/54/stream_positions.sql
diff --git a/synapse/storage/schema/full_schemas/README.txt b/synapse/storage/data_stores/main/schema/full_schemas/README.txt
index d3f6401344..d3f6401344 100644
--- a/synapse/storage/schema/full_schemas/README.txt
+++ b/synapse/storage/data_stores/main/schema/full_schemas/README.txt
diff --git a/synapse/storage/search.py b/synapse/storage/data_stores/main/search.py
index 7695bf09fc..0e08497452 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/data_stores/main/search.py
@@ -25,10 +25,9 @@ from twisted.internet import defer
from synapse.api.errors import SynapseError
from synapse.storage._base import make_in_list_sql_clause
+from synapse.storage.background_updates import BackgroundUpdateStore
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
-from .background_updates import BackgroundUpdateStore
-
logger = logging.getLogger(__name__)
SearchEntry = namedtuple(
diff --git a/synapse/storage/signatures.py b/synapse/storage/data_stores/main/signatures.py
index fb83218f90..556191b76f 100644
--- a/synapse/storage/signatures.py
+++ b/synapse/storage/data_stores/main/signatures.py
@@ -20,10 +20,9 @@ from unpaddedbase64 import encode_base64
from twisted.internet import defer
from synapse.crypto.event_signing import compute_event_reference_hash
+from synapse.storage._base import SQLBaseStore
from synapse.util.caches.descriptors import cached, cachedList
-from ._base import SQLBaseStore
-
# py2 sqlite has buffer hardcoded as only binary type, so we must use it,
# despite being deprecated and removed in favor of memoryview
if six.PY2:
diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py
new file mode 100644
index 0000000000..d54442e5fa
--- /dev/null
+++ b/synapse/storage/data_stores/main/state.py
@@ -0,0 +1,1244 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import namedtuple
+
+from six import iteritems, itervalues
+from six.moves import range
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes
+from synapse.api.errors import NotFoundError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.background_updates import BackgroundUpdateStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.state import StateFilter
+from synapse.util.caches import get_cache_factor_for, intern_string
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.caches.dictionary_cache import DictionaryCache
+from synapse.util.stringutils import to_ascii
+
+logger = logging.getLogger(__name__)
+
+
+MAX_STATE_DELTA_HOPS = 100
+
+
+class _GetStateGroupDelta(
+ namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))
+):
+ """Return type of get_state_group_delta that implements __len__, which lets
+ us use the itrable flag when caching
+ """
+
+ __slots__ = []
+
+ def __len__(self):
+ return len(self.delta_ids) if self.delta_ids else 0
+
+
+class StateGroupBackgroundUpdateStore(SQLBaseStore):
+ """Defines functions related to state groups needed to run the state backgroud
+ updates.
+ """
+
+ def _count_state_group_hops_txn(self, txn, state_group):
+ """Given a state group, count how many hops there are in the tree.
+
+ This is used to ensure the delta chains don't get too long.
+ """
+ if isinstance(self.database_engine, PostgresEngine):
+ sql = """
+ WITH RECURSIVE state(state_group) AS (
+ VALUES(?::bigint)
+ UNION ALL
+ SELECT prev_state_group FROM state_group_edges e, state s
+ WHERE s.state_group = e.state_group
+ )
+ SELECT count(*) FROM state;
+ """
+
+ txn.execute(sql, (state_group,))
+ row = txn.fetchone()
+ if row and row[0]:
+ return row[0]
+ else:
+ return 0
+ else:
+ # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+ # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+ next_group = state_group
+ count = 0
+
+ while next_group:
+ next_group = self._simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": next_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+ if next_group:
+ count += 1
+
+ return count
+
+ def _get_state_groups_from_groups_txn(
+ self, txn, groups, state_filter=StateFilter.all()
+ ):
+ results = {group: {} for group in groups}
+
+ where_clause, where_args = state_filter.make_sql_filter_clause()
+
+ # Unless the filter clause is empty, we're going to append it after an
+ # existing where clause
+ if where_clause:
+ where_clause = " AND (%s)" % (where_clause,)
+
+ if isinstance(self.database_engine, PostgresEngine):
+ # Temporarily disable sequential scans in this transaction. This is
+ # a temporary hack until we can add the right indices in
+ txn.execute("SET LOCAL enable_seqscan=off")
+
+ # The below query walks the state_group tree so that the "state"
+ # table includes all state_groups in the tree. It then joins
+ # against `state_groups_state` to fetch the latest state.
+ # It assumes that previous state groups are always numerically
+ # lesser.
+ # The PARTITION is used to get the event_id in the greatest state
+ # group for the given type, state_key.
+ # This may return multiple rows per (type, state_key), but last_value
+ # should be the same.
+ sql = """
+ WITH RECURSIVE state(state_group) AS (
+ VALUES(?::bigint)
+ UNION ALL
+ SELECT prev_state_group FROM state_group_edges e, state s
+ WHERE s.state_group = e.state_group
+ )
+ SELECT DISTINCT type, state_key, last_value(event_id) OVER (
+ PARTITION BY type, state_key ORDER BY state_group ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ ) AS event_id FROM state_groups_state
+ WHERE state_group IN (
+ SELECT state_group FROM state
+ )
+ """
+
+ for group in groups:
+ args = [group]
+ args.extend(where_args)
+
+ txn.execute(sql + where_clause, args)
+ for row in txn:
+ typ, state_key, event_id = row
+ key = (typ, state_key)
+ results[group][key] = event_id
+ else:
+ max_entries_returned = state_filter.max_entries_returned()
+
+ # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+ # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+ for group in groups:
+ next_group = group
+
+ while next_group:
+ # We did this before by getting the list of group ids, and
+ # then passing that list to sqlite to get latest event for
+ # each (type, state_key). However, that was terribly slow
+ # without the right indices (which we can't add until
+ # after we finish deduping state, which requires this func)
+ args = [next_group]
+ args.extend(where_args)
+
+ txn.execute(
+ "SELECT type, state_key, event_id FROM state_groups_state"
+ " WHERE state_group = ? " + where_clause,
+ args,
+ )
+ results[group].update(
+ ((typ, state_key), event_id)
+ for typ, state_key, event_id in txn
+ if (typ, state_key) not in results[group]
+ )
+
+ # If the number of entries in the (type,state_key)->event_id dict
+ # matches the number of (type,state_keys) types we were searching
+ # for, then we must have found them all, so no need to go walk
+ # further down the tree... UNLESS our types filter contained
+ # wildcards (i.e. Nones) in which case we have to do an exhaustive
+ # search
+ if (
+ max_entries_returned is not None
+ and len(results[group]) == max_entries_returned
+ ):
+ break
+
+ next_group = self._simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": next_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+
+ return results
+
+
+# this inherits from EventsWorkerStore because it calls self.get_events
+class StateGroupWorkerStore(
+ EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore
+):
+ """The parts of StateGroupStore that can be called from workers.
+ """
+
+ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
+ STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
+ CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
+
+ def __init__(self, db_conn, hs):
+ super(StateGroupWorkerStore, self).__init__(db_conn, hs)
+
+ # Originally the state store used a single DictionaryCache to cache the
+ # event IDs for the state types in a given state group to avoid hammering
+ # on the state_group* tables.
+ #
+ # The point of using a DictionaryCache is that it can cache a subset
+ # of the state events for a given state group (i.e. a subset of the keys for a
+ # given dict which is an entry in the cache for a given state group ID).
+ #
+ # However, this poses problems when performing complicated queries
+ # on the store - for instance: "give me all the state for this group, but
+ # limit members to this subset of users", as DictionaryCache's API isn't
+ # rich enough to say "please cache any of these fields, apart from this subset".
+ # This is problematic when lazy loading members, which requires this behaviour,
+ # as without it the cache has no choice but to speculatively load all
+ # state events for the group, which negates the efficiency being sought.
+ #
+ # Rather than overcomplicating DictionaryCache's API, we instead split the
+ # state_group_cache into two halves - one for tracking non-member events,
+ # and the other for tracking member_events. This means that lazy loading
+ # queries can be made in a cache-friendly manner by querying both caches
+ # separately and then merging the result. So for the example above, you
+ # would query the members cache for a specific subset of state keys
+ # (which DictionaryCache will handle efficiently and fine) and the non-members
+ # cache for all state (which DictionaryCache will similarly handle fine)
+ # and then just merge the results together.
+ #
+ # We size the non-members cache to be smaller than the members cache as the
+ # vast majority of state in Matrix (today) is member events.
+
+ self._state_group_cache = DictionaryCache(
+ "*stateGroupCache*",
+ # TODO: this hasn't been tuned yet
+ 50000 * get_cache_factor_for("stateGroupCache"),
+ )
+ self._state_group_members_cache = DictionaryCache(
+ "*stateGroupMembersCache*",
+ 500000 * get_cache_factor_for("stateGroupMembersCache"),
+ )
+
+ @defer.inlineCallbacks
+ def get_room_version(self, room_id):
+ """Get the room_version of a given room
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[str]
+
+ Raises:
+ NotFoundError if the room is unknown
+ """
+ # for now we do this by looking at the create event. We may want to cache this
+ # more intelligently in future.
+
+ # Retrieve the room's create event
+ create_event = yield self.get_create_event_for_room(room_id)
+ return create_event.content.get("room_version", "1")
+
+ @defer.inlineCallbacks
+ def get_room_predecessor(self, room_id):
+ """Get the predecessor room of an upgraded room if one exists.
+ Otherwise return None.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[unicode|None]: predecessor room id
+
+ Raises:
+ NotFoundError if the room is unknown
+ """
+ # Retrieve the room's create event
+ create_event = yield self.get_create_event_for_room(room_id)
+
+ # Return predecessor if present
+ return create_event.content.get("predecessor", None)
+
+ @defer.inlineCallbacks
+ def get_create_event_for_room(self, room_id):
+ """Get the create state event for a room.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[EventBase]: The room creation event.
+
+ Raises:
+ NotFoundError if the room is unknown
+ """
+ state_ids = yield self.get_current_state_ids(room_id)
+ create_id = state_ids.get((EventTypes.Create, ""))
+
+ # If we can't find the create event, assume we've hit a dead end
+ if not create_id:
+ raise NotFoundError("Unknown room %s" % (room_id))
+
+ # Retrieve the room's create event and return
+ create_event = yield self.get_event(create_id)
+ return create_event
+
+ @cached(max_entries=100000, iterable=True)
+ def get_current_state_ids(self, room_id):
+ """Get the current state event ids for a room based on the
+ current_state_events table.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ deferred: dict of (type, state_key) -> event_id
+ """
+
+ def _get_current_state_ids_txn(txn):
+ txn.execute(
+ """SELECT type, state_key, event_id FROM current_state_events
+ WHERE room_id = ?
+ """,
+ (room_id,),
+ )
+
+ return {
+ (intern_string(r[0]), intern_string(r[1])): to_ascii(r[2]) for r in txn
+ }
+
+ return self.runInteraction("get_current_state_ids", _get_current_state_ids_txn)
+
+ # FIXME: how should this be cached?
+ def get_filtered_current_state_ids(self, room_id, state_filter=StateFilter.all()):
+ """Get the current state event of a given type for a room based on the
+ current_state_events table. This may not be as up-to-date as the result
+ of doing a fresh state resolution as per state_handler.get_current_state
+
+ Args:
+ room_id (str)
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ Deferred[dict[tuple[str, str], str]]: Map from type/state_key to
+ event ID.
+ """
+
+ where_clause, where_args = state_filter.make_sql_filter_clause()
+
+ if not where_clause:
+ # We delegate to the cached version
+ return self.get_current_state_ids(room_id)
+
+ def _get_filtered_current_state_ids_txn(txn):
+ results = {}
+ sql = """
+ SELECT type, state_key, event_id FROM current_state_events
+ WHERE room_id = ?
+ """
+
+ if where_clause:
+ sql += " AND (%s)" % (where_clause,)
+
+ args = [room_id]
+ args.extend(where_args)
+ txn.execute(sql, args)
+ for row in txn:
+ typ, state_key, event_id = row
+ key = (intern_string(typ), intern_string(state_key))
+ results[key] = event_id
+
+ return results
+
+ return self.runInteraction(
+ "get_filtered_current_state_ids", _get_filtered_current_state_ids_txn
+ )
+
+ @defer.inlineCallbacks
+ def get_canonical_alias_for_room(self, room_id):
+ """Get canonical alias for room, if any
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[str|None]: The canonical alias, if any
+ """
+
+ state = yield self.get_filtered_current_state_ids(
+ room_id, StateFilter.from_types([(EventTypes.CanonicalAlias, "")])
+ )
+
+ event_id = state.get((EventTypes.CanonicalAlias, ""))
+ if not event_id:
+ return
+
+ event = yield self.get_event(event_id, allow_none=True)
+ if not event:
+ return
+
+ return event.content.get("canonical_alias")
+
+ @cached(max_entries=10000, iterable=True)
+ def get_state_group_delta(self, state_group):
+ """Given a state group try to return a previous group and a delta between
+ the old and the new.
+
+ Returns:
+ (prev_group, delta_ids), where both may be None.
+ """
+
+ def _get_state_group_delta_txn(txn):
+ prev_group = self._simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": state_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+
+ if not prev_group:
+ return _GetStateGroupDelta(None, None)
+
+ delta_ids = self._simple_select_list_txn(
+ txn,
+ table="state_groups_state",
+ keyvalues={"state_group": state_group},
+ retcols=("type", "state_key", "event_id"),
+ )
+
+ return _GetStateGroupDelta(
+ prev_group,
+ {(row["type"], row["state_key"]): row["event_id"] for row in delta_ids},
+ )
+
+ return self.runInteraction("get_state_group_delta", _get_state_group_delta_txn)
+
+ @defer.inlineCallbacks
+ def get_state_groups_ids(self, _room_id, event_ids):
+ """Get the event IDs of all the state for the state groups for the given events
+
+ Args:
+ _room_id (str): id of the room for these events
+ event_ids (iterable[str]): ids of the events
+
+ Returns:
+ Deferred[dict[int, dict[tuple[str, str], str]]]:
+ dict of state_group_id -> (dict of (type, state_key) -> event id)
+ """
+ if not event_ids:
+ return {}
+
+ event_to_groups = yield self._get_state_group_for_events(event_ids)
+
+ groups = set(itervalues(event_to_groups))
+ group_to_state = yield self._get_state_for_groups(groups)
+
+ return group_to_state
+
+ @defer.inlineCallbacks
+ def get_state_ids_for_group(self, state_group):
+ """Get the event IDs of all the state in the given state group
+
+ Args:
+ state_group (int)
+
+ Returns:
+ Deferred[dict]: Resolves to a map of (type, state_key) -> event_id
+ """
+ group_to_state = yield self._get_state_for_groups((state_group,))
+
+ return group_to_state[state_group]
+
+ @defer.inlineCallbacks
+ def get_state_groups(self, room_id, event_ids):
+ """ Get the state groups for the given list of event_ids
+
+ Returns:
+ Deferred[dict[int, list[EventBase]]]:
+ dict of state_group_id -> list of state events.
+ """
+ if not event_ids:
+ return {}
+
+ group_to_ids = yield self.get_state_groups_ids(room_id, event_ids)
+
+ state_event_map = yield self.get_events(
+ [
+ ev_id
+ for group_ids in itervalues(group_to_ids)
+ for ev_id in itervalues(group_ids)
+ ],
+ get_prev_content=False,
+ )
+
+ return {
+ group: [
+ state_event_map[v]
+ for v in itervalues(event_id_map)
+ if v in state_event_map
+ ]
+ for group, event_id_map in iteritems(group_to_ids)
+ }
+
+ @defer.inlineCallbacks
+ def _get_state_groups_from_groups(self, groups, state_filter):
+ """Returns the state groups for a given set of groups, filtering on
+ types of state events.
+
+ Args:
+ groups(list[int]): list of state group IDs to query
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+ Returns:
+ Deferred[dict[int, dict[tuple[str, str], str]]]:
+ dict of state_group_id -> (dict of (type, state_key) -> event id)
+ """
+ results = {}
+
+ chunks = [groups[i : i + 100] for i in range(0, len(groups), 100)]
+ for chunk in chunks:
+ res = yield self.runInteraction(
+ "_get_state_groups_from_groups",
+ self._get_state_groups_from_groups_txn,
+ chunk,
+ state_filter,
+ )
+ results.update(res)
+
+ return results
+
+ @defer.inlineCallbacks
+ def get_state_for_events(self, event_ids, state_filter=StateFilter.all()):
+ """Given a list of event_ids and type tuples, return a list of state
+ dicts for each event.
+
+ Args:
+ event_ids (list[string])
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ deferred: A dict of (event_id) -> (type, state_key) -> [state_events]
+ """
+ event_to_groups = yield self._get_state_group_for_events(event_ids)
+
+ groups = set(itervalues(event_to_groups))
+ group_to_state = yield self._get_state_for_groups(groups, state_filter)
+
+ state_event_map = yield self.get_events(
+ [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)],
+ get_prev_content=False,
+ )
+
+ event_to_state = {
+ event_id: {
+ k: state_event_map[v]
+ for k, v in iteritems(group_to_state[group])
+ if v in state_event_map
+ }
+ for event_id, group in iteritems(event_to_groups)
+ }
+
+ return {event: event_to_state[event] for event in event_ids}
+
+ @defer.inlineCallbacks
+ def get_state_ids_for_events(self, event_ids, state_filter=StateFilter.all()):
+ """
+ Get the state dicts corresponding to a list of events, containing the event_ids
+ of the state events (as opposed to the events themselves)
+
+ Args:
+ event_ids(list(str)): events whose state should be returned
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ A deferred dict from event_id -> (type, state_key) -> event_id
+ """
+ event_to_groups = yield self._get_state_group_for_events(event_ids)
+
+ groups = set(itervalues(event_to_groups))
+ group_to_state = yield self._get_state_for_groups(groups, state_filter)
+
+ event_to_state = {
+ event_id: group_to_state[group]
+ for event_id, group in iteritems(event_to_groups)
+ }
+
+ return {event: event_to_state[event] for event in event_ids}
+
+ @defer.inlineCallbacks
+ def get_state_for_event(self, event_id, state_filter=StateFilter.all()):
+ """
+ Get the state dict corresponding to a particular event
+
+ Args:
+ event_id(str): event whose state should be returned
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ A deferred dict from (type, state_key) -> state_event
+ """
+ state_map = yield self.get_state_for_events([event_id], state_filter)
+ return state_map[event_id]
+
+ @defer.inlineCallbacks
+ def get_state_ids_for_event(self, event_id, state_filter=StateFilter.all()):
+ """
+ Get the state dict corresponding to a particular event
+
+ Args:
+ event_id(str): event whose state should be returned
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ A deferred dict from (type, state_key) -> state_event
+ """
+ state_map = yield self.get_state_ids_for_events([event_id], state_filter)
+ return state_map[event_id]
+
+ @cached(max_entries=50000)
+ def _get_state_group_for_event(self, event_id):
+ return self._simple_select_one_onecol(
+ table="event_to_state_groups",
+ keyvalues={"event_id": event_id},
+ retcol="state_group",
+ allow_none=True,
+ desc="_get_state_group_for_event",
+ )
+
+ @cachedList(
+ cached_method_name="_get_state_group_for_event",
+ list_name="event_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def _get_state_group_for_events(self, event_ids):
+ """Returns mapping event_id -> state_group
+ """
+ rows = yield self._simple_select_many_batch(
+ table="event_to_state_groups",
+ column="event_id",
+ iterable=event_ids,
+ keyvalues={},
+ retcols=("event_id", "state_group"),
+ desc="_get_state_group_for_events",
+ )
+
+ return {row["event_id"]: row["state_group"] for row in rows}
+
+ def _get_state_for_group_using_cache(self, cache, group, state_filter):
+ """Checks if group is in cache. See `_get_state_for_groups`
+
+ Args:
+ cache(DictionaryCache): the state group cache to use
+ group(int): The state group to lookup
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns 2-tuple (`state_dict`, `got_all`).
+ `got_all` is a bool indicating if we successfully retrieved all
+ requests state from the cache, if False we need to query the DB for the
+ missing state.
+ """
+ is_all, known_absent, state_dict_ids = cache.get(group)
+
+ if is_all or state_filter.is_full():
+ # Either we have everything or want everything, either way
+ # `is_all` tells us whether we've gotten everything.
+ return state_filter.filter_state(state_dict_ids), is_all
+
+ # tracks whether any of our requested types are missing from the cache
+ missing_types = False
+
+ if state_filter.has_wildcards():
+ # We don't know if we fetched all the state keys for the types in
+ # the filter that are wildcards, so we have to assume that we may
+ # have missed some.
+ missing_types = True
+ else:
+ # There aren't any wild cards, so `concrete_types()` returns the
+ # complete list of event types we're wanting.
+ for key in state_filter.concrete_types():
+ if key not in state_dict_ids and key not in known_absent:
+ missing_types = True
+ break
+
+ return state_filter.filter_state(state_dict_ids), not missing_types
+
+ @defer.inlineCallbacks
+ def _get_state_for_groups(self, groups, state_filter=StateFilter.all()):
+ """Gets the state at each of a list of state groups, optionally
+ filtering by type/state_key
+
+ Args:
+ groups (iterable[int]): list of state groups for which we want
+ to get the state.
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+ Returns:
+ Deferred[dict[int, dict[tuple[str, str], str]]]:
+ dict of state_group_id -> (dict of (type, state_key) -> event id)
+ """
+
+ member_filter, non_member_filter = state_filter.get_member_split()
+
+ # Now we look them up in the member and non-member caches
+ non_member_state, incomplete_groups_nm, = (
+ yield self._get_state_for_groups_using_cache(
+ groups, self._state_group_cache, state_filter=non_member_filter
+ )
+ )
+
+ member_state, incomplete_groups_m, = (
+ yield self._get_state_for_groups_using_cache(
+ groups, self._state_group_members_cache, state_filter=member_filter
+ )
+ )
+
+ state = dict(non_member_state)
+ for group in groups:
+ state[group].update(member_state[group])
+
+ # Now fetch any missing groups from the database
+
+ incomplete_groups = incomplete_groups_m | incomplete_groups_nm
+
+ if not incomplete_groups:
+ return state
+
+ cache_sequence_nm = self._state_group_cache.sequence
+ cache_sequence_m = self._state_group_members_cache.sequence
+
+ # Help the cache hit ratio by expanding the filter a bit
+ db_state_filter = state_filter.return_expanded()
+
+ group_to_state_dict = yield self._get_state_groups_from_groups(
+ list(incomplete_groups), state_filter=db_state_filter
+ )
+
+ # Now lets update the caches
+ self._insert_into_cache(
+ group_to_state_dict,
+ db_state_filter,
+ cache_seq_num_members=cache_sequence_m,
+ cache_seq_num_non_members=cache_sequence_nm,
+ )
+
+ # And finally update the result dict, by filtering out any extra
+ # stuff we pulled out of the database.
+ for group, group_state_dict in iteritems(group_to_state_dict):
+ # We just replace any existing entries, as we will have loaded
+ # everything we need from the database anyway.
+ state[group] = state_filter.filter_state(group_state_dict)
+
+ return state
+
+ def _get_state_for_groups_using_cache(self, groups, cache, state_filter):
+ """Gets the state at each of a list of state groups, optionally
+ filtering by type/state_key, querying from a specific cache.
+
+ Args:
+ groups (iterable[int]): list of state groups for which we want
+ to get the state.
+ cache (DictionaryCache): the cache of group ids to state dicts which
+ we will pass through - either the normal state cache or the specific
+ members state cache.
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns:
+ tuple[dict[int, dict[tuple[str, str], str]], set[int]]: Tuple of
+ dict of state_group_id -> (dict of (type, state_key) -> event id)
+ of entries in the cache, and the state group ids either missing
+ from the cache or incomplete.
+ """
+ results = {}
+ incomplete_groups = set()
+ for group in set(groups):
+ state_dict_ids, got_all = self._get_state_for_group_using_cache(
+ cache, group, state_filter
+ )
+ results[group] = state_dict_ids
+
+ if not got_all:
+ incomplete_groups.add(group)
+
+ return results, incomplete_groups
+
+ def _insert_into_cache(
+ self,
+ group_to_state_dict,
+ state_filter,
+ cache_seq_num_members,
+ cache_seq_num_non_members,
+ ):
+ """Inserts results from querying the database into the relevant cache.
+
+ Args:
+ group_to_state_dict (dict): The new entries pulled from database.
+ Map from state group to state dict
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+ cache_seq_num_members (int): Sequence number of member cache since
+ last lookup in cache
+ cache_seq_num_non_members (int): Sequence number of member cache since
+ last lookup in cache
+ """
+
+ # We need to work out which types we've fetched from the DB for the
+ # member vs non-member caches. This should be as accurate as possible,
+ # but can be an underestimate (e.g. when we have wild cards)
+
+ member_filter, non_member_filter = state_filter.get_member_split()
+ if member_filter.is_full():
+ # We fetched all member events
+ member_types = None
+ else:
+ # `concrete_types()` will only return a subset when there are wild
+ # cards in the filter, but that's fine.
+ member_types = member_filter.concrete_types()
+
+ if non_member_filter.is_full():
+ # We fetched all non member events
+ non_member_types = None
+ else:
+ non_member_types = non_member_filter.concrete_types()
+
+ for group, group_state_dict in iteritems(group_to_state_dict):
+ state_dict_members = {}
+ state_dict_non_members = {}
+
+ for k, v in iteritems(group_state_dict):
+ if k[0] == EventTypes.Member:
+ state_dict_members[k] = v
+ else:
+ state_dict_non_members[k] = v
+
+ self._state_group_members_cache.update(
+ cache_seq_num_members,
+ key=group,
+ value=state_dict_members,
+ fetched_keys=member_types,
+ )
+
+ self._state_group_cache.update(
+ cache_seq_num_non_members,
+ key=group,
+ value=state_dict_non_members,
+ fetched_keys=non_member_types,
+ )
+
+ def store_state_group(
+ self, event_id, room_id, prev_group, delta_ids, current_state_ids
+ ):
+ """Store a new set of state, returning a newly assigned state group.
+
+ Args:
+ event_id (str): The event ID for which the state was calculated
+ room_id (str)
+ prev_group (int|None): A previous state group for the room, optional.
+ delta_ids (dict|None): The delta between state at `prev_group` and
+ `current_state_ids`, if `prev_group` was given. Same format as
+ `current_state_ids`.
+ current_state_ids (dict): The state to store. Map of (type, state_key)
+ to event_id.
+
+ Returns:
+ Deferred[int]: The state group ID
+ """
+
+ def _store_state_group_txn(txn):
+ if current_state_ids is None:
+ # AFAIK, this can never happen
+ raise Exception("current_state_ids cannot be None")
+
+ state_group = self.database_engine.get_next_state_group_id(txn)
+
+ self._simple_insert_txn(
+ txn,
+ table="state_groups",
+ values={"id": state_group, "room_id": room_id, "event_id": event_id},
+ )
+
+ # We persist as a delta if we can, while also ensuring the chain
+ # of deltas isn't tooo long, as otherwise read performance degrades.
+ if prev_group:
+ is_in_db = self._simple_select_one_onecol_txn(
+ txn,
+ table="state_groups",
+ keyvalues={"id": prev_group},
+ retcol="id",
+ allow_none=True,
+ )
+ if not is_in_db:
+ raise Exception(
+ "Trying to persist state with unpersisted prev_group: %r"
+ % (prev_group,)
+ )
+
+ potential_hops = self._count_state_group_hops_txn(txn, prev_group)
+ if prev_group and potential_hops < MAX_STATE_DELTA_HOPS:
+ self._simple_insert_txn(
+ txn,
+ table="state_group_edges",
+ values={"state_group": state_group, "prev_state_group": prev_group},
+ )
+
+ self._simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in iteritems(delta_ids)
+ ],
+ )
+ else:
+ self._simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in iteritems(current_state_ids)
+ ],
+ )
+
+ # Prefill the state group caches with this group.
+ # It's fine to use the sequence like this as the state group map
+ # is immutable. (If the map wasn't immutable then this prefill could
+ # race with another update)
+
+ current_member_state_ids = {
+ s: ev
+ for (s, ev) in iteritems(current_state_ids)
+ if s[0] == EventTypes.Member
+ }
+ txn.call_after(
+ self._state_group_members_cache.update,
+ self._state_group_members_cache.sequence,
+ key=state_group,
+ value=dict(current_member_state_ids),
+ )
+
+ current_non_member_state_ids = {
+ s: ev
+ for (s, ev) in iteritems(current_state_ids)
+ if s[0] != EventTypes.Member
+ }
+ txn.call_after(
+ self._state_group_cache.update,
+ self._state_group_cache.sequence,
+ key=state_group,
+ value=dict(current_non_member_state_ids),
+ )
+
+ return state_group
+
+ return self.runInteraction("store_state_group", _store_state_group_txn)
+
+
+class StateBackgroundUpdateStore(
+ StateGroupBackgroundUpdateStore, BackgroundUpdateStore
+):
+
+ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
+ STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
+ CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
+ EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
+
+ def __init__(self, db_conn, hs):
+ super(StateBackgroundUpdateStore, self).__init__(db_conn, hs)
+ self.register_background_update_handler(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME,
+ self._background_deduplicate_state,
+ )
+ self.register_background_update_handler(
+ self.STATE_GROUP_INDEX_UPDATE_NAME, self._background_index_state
+ )
+ self.register_background_index_update(
+ self.CURRENT_STATE_INDEX_UPDATE_NAME,
+ index_name="current_state_events_member_index",
+ table="current_state_events",
+ columns=["state_key"],
+ where_clause="type='m.room.member'",
+ )
+ self.register_background_index_update(
+ self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME,
+ index_name="event_to_state_groups_sg_index",
+ table="event_to_state_groups",
+ columns=["state_group"],
+ )
+
+ @defer.inlineCallbacks
+ def _background_deduplicate_state(self, progress, batch_size):
+ """This background update will slowly deduplicate state by reencoding
+ them as deltas.
+ """
+ last_state_group = progress.get("last_state_group", 0)
+ rows_inserted = progress.get("rows_inserted", 0)
+ max_group = progress.get("max_group", None)
+
+ BATCH_SIZE_SCALE_FACTOR = 100
+
+ batch_size = max(1, int(batch_size / BATCH_SIZE_SCALE_FACTOR))
+
+ if max_group is None:
+ rows = yield self._execute(
+ "_background_deduplicate_state",
+ None,
+ "SELECT coalesce(max(id), 0) FROM state_groups",
+ )
+ max_group = rows[0][0]
+
+ def reindex_txn(txn):
+ new_last_state_group = last_state_group
+ for count in range(batch_size):
+ txn.execute(
+ "SELECT id, room_id FROM state_groups"
+ " WHERE ? < id AND id <= ?"
+ " ORDER BY id ASC"
+ " LIMIT 1",
+ (new_last_state_group, max_group),
+ )
+ row = txn.fetchone()
+ if row:
+ state_group, room_id = row
+
+ if not row or not state_group:
+ return True, count
+
+ txn.execute(
+ "SELECT state_group FROM state_group_edges"
+ " WHERE state_group = ?",
+ (state_group,),
+ )
+
+ # If we reach a point where we've already started inserting
+ # edges we should stop.
+ if txn.fetchall():
+ return True, count
+
+ txn.execute(
+ "SELECT coalesce(max(id), 0) FROM state_groups"
+ " WHERE id < ? AND room_id = ?",
+ (state_group, room_id),
+ )
+ prev_group, = txn.fetchone()
+ new_last_state_group = state_group
+
+ if prev_group:
+ potential_hops = self._count_state_group_hops_txn(txn, prev_group)
+ if potential_hops >= MAX_STATE_DELTA_HOPS:
+ # We want to ensure chains are at most this long,#
+ # otherwise read performance degrades.
+ continue
+
+ prev_state = self._get_state_groups_from_groups_txn(
+ txn, [prev_group]
+ )
+ prev_state = prev_state[prev_group]
+
+ curr_state = self._get_state_groups_from_groups_txn(
+ txn, [state_group]
+ )
+ curr_state = curr_state[state_group]
+
+ if not set(prev_state.keys()) - set(curr_state.keys()):
+ # We can only do a delta if the current has a strict super set
+ # of keys
+
+ delta_state = {
+ key: value
+ for key, value in iteritems(curr_state)
+ if prev_state.get(key, None) != value
+ }
+
+ self._simple_delete_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": state_group},
+ )
+
+ self._simple_insert_txn(
+ txn,
+ table="state_group_edges",
+ values={
+ "state_group": state_group,
+ "prev_state_group": prev_group,
+ },
+ )
+
+ self._simple_delete_txn(
+ txn,
+ table="state_groups_state",
+ keyvalues={"state_group": state_group},
+ )
+
+ self._simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in iteritems(delta_state)
+ ],
+ )
+
+ progress = {
+ "last_state_group": state_group,
+ "rows_inserted": rows_inserted + batch_size,
+ "max_group": max_group,
+ }
+
+ self._background_update_progress_txn(
+ txn, self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, progress
+ )
+
+ return False, batch_size
+
+ finished, result = yield self.runInteraction(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, reindex_txn
+ )
+
+ if finished:
+ yield self._end_background_update(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME
+ )
+
+ return result * BATCH_SIZE_SCALE_FACTOR
+
+ @defer.inlineCallbacks
+ def _background_index_state(self, progress, batch_size):
+ def reindex_txn(conn):
+ conn.rollback()
+ if isinstance(self.database_engine, PostgresEngine):
+ # postgres insists on autocommit for the index
+ conn.set_session(autocommit=True)
+ try:
+ txn = conn.cursor()
+ txn.execute(
+ "CREATE INDEX CONCURRENTLY state_groups_state_type_idx"
+ " ON state_groups_state(state_group, type, state_key)"
+ )
+ txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
+ finally:
+ conn.set_session(autocommit=False)
+ else:
+ txn = conn.cursor()
+ txn.execute(
+ "CREATE INDEX state_groups_state_type_idx"
+ " ON state_groups_state(state_group, type, state_key)"
+ )
+ txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
+
+ yield self.runWithConnection(reindex_txn)
+
+ yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME)
+
+ return 1
+
+
+class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore):
+ """ Keeps track of the state at a given event.
+
+ This is done by the concept of `state groups`. Every event is a assigned
+ a state group (identified by an arbitrary string), which references a
+ collection of state events. The current state of an event is then the
+ collection of state events referenced by the event's state group.
+
+ Hence, every change in the current state causes a new state group to be
+ generated. However, if no change happens (e.g., if we get a message event
+ with only one parent it inherits the state group from its parent.)
+
+ There are three tables:
+ * `state_groups`: Stores group name, first event with in the group and
+ room id.
+ * `event_to_state_groups`: Maps events to state groups.
+ * `state_groups_state`: Maps state group to state events.
+ """
+
+ def __init__(self, db_conn, hs):
+ super(StateStore, self).__init__(db_conn, hs)
+
+ def _store_event_state_mappings_txn(self, txn, events_and_contexts):
+ state_groups = {}
+ for event, context in events_and_contexts:
+ if event.internal_metadata.is_outlier():
+ continue
+
+ # if the event was rejected, just give it the same state as its
+ # predecessor.
+ if context.rejected:
+ state_groups[event.event_id] = context.prev_group
+ continue
+
+ state_groups[event.event_id] = context.state_group
+
+ self._simple_insert_many_txn(
+ txn,
+ table="event_to_state_groups",
+ values=[
+ {"state_group": state_group_id, "event_id": event_id}
+ for event_id, state_group_id in iteritems(state_groups)
+ ],
+ )
+
+ for event_id, state_group_id in iteritems(state_groups):
+ txn.call_after(
+ self._get_state_group_for_event.prefill, (event_id,), state_group_id
+ )
diff --git a/synapse/storage/state_deltas.py b/synapse/storage/data_stores/main/state_deltas.py
index 28f33ec18f..28f33ec18f 100644
--- a/synapse/storage/state_deltas.py
+++ b/synapse/storage/data_stores/main/state_deltas.py
diff --git a/synapse/storage/stats.py b/synapse/storage/data_stores/main/stats.py
index 7c224cd3d9..5ab639b2ad 100644
--- a/synapse/storage/stats.py
+++ b/synapse/storage/data_stores/main/stats.py
@@ -21,8 +21,8 @@ from twisted.internet import defer
from twisted.internet.defer import DeferredLock
from synapse.api.constants import EventTypes, Membership
-from synapse.storage import PostgresEngine
-from synapse.storage.state_deltas import StateDeltasStore
+from synapse.storage.data_stores.main.state_deltas import StateDeltasStore
+from synapse.storage.engines import PostgresEngine
from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__)
diff --git a/synapse/storage/stream.py b/synapse/storage/data_stores/main/stream.py
index 490454f19a..263999dfca 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@@ -43,8 +43,8 @@ from twisted.internet import defer
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.storage._base import SQLBaseStore
+from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
from synapse.storage.engines import PostgresEngine
-from synapse.storage.events_worker import EventsWorkerStore
from synapse.types import RoomStreamToken
from synapse.util.caches.stream_change_cache import StreamChangeCache
diff --git a/synapse/storage/tags.py b/synapse/storage/data_stores/main/tags.py
index 20dd6bd53d..10d1887f75 100644
--- a/synapse/storage/tags.py
+++ b/synapse/storage/data_stores/main/tags.py
@@ -22,7 +22,7 @@ from canonicaljson import json
from twisted.internet import defer
-from synapse.storage.account_data import AccountDataWorkerStore
+from synapse.storage.data_stores.main.account_data import AccountDataWorkerStore
from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__)
diff --git a/synapse/storage/transactions.py b/synapse/storage/data_stores/main/transactions.py
index 289c117396..01b1be5e14 100644
--- a/synapse/storage/transactions.py
+++ b/synapse/storage/data_stores/main/transactions.py
@@ -23,10 +23,9 @@ from canonicaljson import encode_canonical_json
from twisted.internet import defer
from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.util.caches.expiringcache import ExpiringCache
-from ._base import SQLBaseStore, db_to_json
-
# py2 sqlite has buffer hardcoded as only binary type, so we must use it,
# despite being deprecated and removed in favor of memoryview
if six.PY2:
diff --git a/synapse/storage/user_directory.py b/synapse/storage/data_stores/main/user_directory.py
index 1b1e4751b9..652abe0e6a 100644
--- a/synapse/storage/user_directory.py
+++ b/synapse/storage/data_stores/main/user_directory.py
@@ -20,9 +20,9 @@ from twisted.internet import defer
from synapse.api.constants import EventTypes, JoinRules
from synapse.storage.background_updates import BackgroundUpdateStore
+from synapse.storage.data_stores.main.state import StateFilter
+from synapse.storage.data_stores.main.state_deltas import StateDeltasStore
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
-from synapse.storage.state import StateFilter
-from synapse.storage.state_deltas import StateDeltasStore
from synapse.types import get_domain_from_id, get_localpart_from_id
from synapse.util.caches.descriptors import cached
diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/data_stores/main/user_erasure_store.py
index aa4f0da5f0..aa4f0da5f0 100644
--- a/synapse/storage/user_erasure_store.py
+++ b/synapse/storage/data_stores/main/user_erasure_store.py
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index e72f89e446..4769b21529 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -14,208 +14,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import itertools
import logging
-import six
-
import attr
-from signedjson.key import decode_verify_key_bytes
-
-from synapse.util import batch_iter
-from synapse.util.caches.descriptors import cached, cachedList
-
-from ._base import SQLBaseStore
logger = logging.getLogger(__name__)
-# py2 sqlite has buffer hardcoded as only binary type, so we must use it,
-# despite being deprecated and removed in favor of memoryview
-if six.PY2:
- db_binary_type = six.moves.builtins.buffer
-else:
- db_binary_type = memoryview
-
@attr.s(slots=True, frozen=True)
class FetchKeyResult(object):
verify_key = attr.ib() # VerifyKey: the key itself
valid_until_ts = attr.ib() # int: how long we can use this key for
-
-
-class KeyStore(SQLBaseStore):
- """Persistence for signature verification keys
- """
-
- @cached()
- def _get_server_verify_key(self, server_name_and_key_id):
- raise NotImplementedError()
-
- @cachedList(
- cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids"
- )
- def get_server_verify_keys(self, server_name_and_key_ids):
- """
- Args:
- server_name_and_key_ids (iterable[Tuple[str, str]]):
- iterable of (server_name, key-id) tuples to fetch keys for
-
- Returns:
- Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]:
- map from (server_name, key_id) -> FetchKeyResult, or None if the key is
- unknown
- """
- keys = {}
-
- def _get_keys(txn, batch):
- """Processes a batch of keys to fetch, and adds the result to `keys`."""
-
- # batch_iter always returns tuples so it's safe to do len(batch)
- sql = (
- "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
- "FROM server_signature_keys WHERE 1=0"
- ) + " OR (server_name=? AND key_id=?)" * len(batch)
-
- txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
-
- for row in txn:
- server_name, key_id, key_bytes, ts_valid_until_ms = row
-
- if ts_valid_until_ms is None:
- # Old keys may be stored with a ts_valid_until_ms of null,
- # in which case we treat this as if it was set to `0`, i.e.
- # it won't match key requests that define a minimum
- # `ts_valid_until_ms`.
- ts_valid_until_ms = 0
-
- res = FetchKeyResult(
- verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
- valid_until_ts=ts_valid_until_ms,
- )
- keys[(server_name, key_id)] = res
-
- def _txn(txn):
- for batch in batch_iter(server_name_and_key_ids, 50):
- _get_keys(txn, batch)
- return keys
-
- return self.runInteraction("get_server_verify_keys", _txn)
-
- def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys):
- """Stores NACL verification keys for remote servers.
- Args:
- from_server (str): Where the verification keys were looked up
- ts_added_ms (int): The time to record that the key was added
- verify_keys (iterable[tuple[str, str, FetchKeyResult]]):
- keys to be stored. Each entry is a triplet of
- (server_name, key_id, key).
- """
- key_values = []
- value_values = []
- invalidations = []
- for server_name, key_id, fetch_result in verify_keys:
- key_values.append((server_name, key_id))
- value_values.append(
- (
- from_server,
- ts_added_ms,
- fetch_result.valid_until_ts,
- db_binary_type(fetch_result.verify_key.encode()),
- )
- )
- # invalidate takes a tuple corresponding to the params of
- # _get_server_verify_key. _get_server_verify_key only takes one
- # param, which is itself the 2-tuple (server_name, key_id).
- invalidations.append((server_name, key_id))
-
- def _invalidate(res):
- f = self._get_server_verify_key.invalidate
- for i in invalidations:
- f((i,))
- return res
-
- return self.runInteraction(
- "store_server_verify_keys",
- self._simple_upsert_many_txn,
- table="server_signature_keys",
- key_names=("server_name", "key_id"),
- key_values=key_values,
- value_names=(
- "from_server",
- "ts_added_ms",
- "ts_valid_until_ms",
- "verify_key",
- ),
- value_values=value_values,
- ).addCallback(_invalidate)
-
- def store_server_keys_json(
- self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes
- ):
- """Stores the JSON bytes for a set of keys from a server
- The JSON should be signed by the originating server, the intermediate
- server, and by this server. Updates the value for the
- (server_name, key_id, from_server) triplet if one already existed.
- Args:
- server_name (str): The name of the server.
- key_id (str): The identifer of the key this JSON is for.
- from_server (str): The server this JSON was fetched from.
- ts_now_ms (int): The time now in milliseconds.
- ts_valid_until_ms (int): The time when this json stops being valid.
- key_json (bytes): The encoded JSON.
- """
- return self._simple_upsert(
- table="server_keys_json",
- keyvalues={
- "server_name": server_name,
- "key_id": key_id,
- "from_server": from_server,
- },
- values={
- "server_name": server_name,
- "key_id": key_id,
- "from_server": from_server,
- "ts_added_ms": ts_now_ms,
- "ts_valid_until_ms": ts_expires_ms,
- "key_json": db_binary_type(key_json_bytes),
- },
- desc="store_server_keys_json",
- )
-
- def get_server_keys_json(self, server_keys):
- """Retrive the key json for a list of server_keys and key ids.
- If no keys are found for a given server, key_id and source then
- that server, key_id, and source triplet entry will be an empty list.
- The JSON is returned as a byte array so that it can be efficiently
- used in an HTTP response.
- Args:
- server_keys (list): List of (server_name, key_id, source) triplets.
- Returns:
- Deferred[dict[Tuple[str, str, str|None], list[dict]]]:
- Dict mapping (server_name, key_id, source) triplets to lists of dicts
- """
-
- def _get_server_keys_json_txn(txn):
- results = {}
- for server_name, key_id, from_server in server_keys:
- keyvalues = {"server_name": server_name}
- if key_id is not None:
- keyvalues["key_id"] = key_id
- if from_server is not None:
- keyvalues["from_server"] = from_server
- rows = self._simple_select_list_txn(
- txn,
- "server_keys_json",
- keyvalues=keyvalues,
- retcols=(
- "key_id",
- "from_server",
- "ts_added_ms",
- "ts_valid_until_ms",
- "key_json",
- ),
- )
- results[(server_name, key_id, from_server)] = rows
- return results
-
- return self.runInteraction("get_server_keys_json", _get_server_keys_json_txn)
diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py
index 3a641f538b..18a462f0ee 100644
--- a/synapse/storage/presence.py
+++ b/synapse/storage/presence.py
@@ -15,12 +15,7 @@
from collections import namedtuple
-from twisted.internet import defer
-
from synapse.api.constants import PresenceState
-from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
-from synapse.util import batch_iter
-from synapse.util.caches.descriptors import cached, cachedList
class UserPresenceState(
@@ -72,132 +67,3 @@ class UserPresenceState(
status_msg=None,
currently_active=False,
)
-
-
-class PresenceStore(SQLBaseStore):
- @defer.inlineCallbacks
- def update_presence(self, presence_states):
- stream_ordering_manager = self._presence_id_gen.get_next_mult(
- len(presence_states)
- )
-
- with stream_ordering_manager as stream_orderings:
- yield self.runInteraction(
- "update_presence",
- self._update_presence_txn,
- stream_orderings,
- presence_states,
- )
-
- return stream_orderings[-1], self._presence_id_gen.get_current_token()
-
- def _update_presence_txn(self, txn, stream_orderings, presence_states):
- for stream_id, state in zip(stream_orderings, presence_states):
- txn.call_after(
- self.presence_stream_cache.entity_has_changed, state.user_id, stream_id
- )
- txn.call_after(self._get_presence_for_user.invalidate, (state.user_id,))
-
- # Actually insert new rows
- self._simple_insert_many_txn(
- txn,
- table="presence_stream",
- values=[
- {
- "stream_id": stream_id,
- "user_id": state.user_id,
- "state": state.state,
- "last_active_ts": state.last_active_ts,
- "last_federation_update_ts": state.last_federation_update_ts,
- "last_user_sync_ts": state.last_user_sync_ts,
- "status_msg": state.status_msg,
- "currently_active": state.currently_active,
- }
- for state in presence_states
- ],
- )
-
- # Delete old rows to stop database from getting really big
- sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
-
- for states in batch_iter(presence_states, 50):
- clause, args = make_in_list_sql_clause(
- self.database_engine, "user_id", [s.user_id for s in states]
- )
- txn.execute(sql + clause, [stream_id] + list(args))
-
- def get_all_presence_updates(self, last_id, current_id):
- if last_id == current_id:
- return defer.succeed([])
-
- def get_all_presence_updates_txn(txn):
- sql = (
- "SELECT stream_id, user_id, state, last_active_ts,"
- " last_federation_update_ts, last_user_sync_ts, status_msg,"
- " currently_active"
- " FROM presence_stream"
- " WHERE ? < stream_id AND stream_id <= ?"
- )
- txn.execute(sql, (last_id, current_id))
- return txn.fetchall()
-
- return self.runInteraction(
- "get_all_presence_updates", get_all_presence_updates_txn
- )
-
- @cached()
- def _get_presence_for_user(self, user_id):
- raise NotImplementedError()
-
- @cachedList(
- cached_method_name="_get_presence_for_user",
- list_name="user_ids",
- num_args=1,
- inlineCallbacks=True,
- )
- def get_presence_for_users(self, user_ids):
- rows = yield self._simple_select_many_batch(
- table="presence_stream",
- column="user_id",
- iterable=user_ids,
- keyvalues={},
- retcols=(
- "user_id",
- "state",
- "last_active_ts",
- "last_federation_update_ts",
- "last_user_sync_ts",
- "status_msg",
- "currently_active",
- ),
- desc="get_presence_for_users",
- )
-
- for row in rows:
- row["currently_active"] = bool(row["currently_active"])
-
- return {row["user_id"]: UserPresenceState(**row) for row in rows}
-
- def get_current_presence_token(self):
- return self._presence_id_gen.get_current_token()
-
- def allow_presence_visible(self, observed_localpart, observer_userid):
- return self._simple_insert(
- table="presence_allow_inbound",
- values={
- "observed_user_id": observed_localpart,
- "observer_user_id": observer_userid,
- },
- desc="allow_presence_visible",
- or_ignore=True,
- )
-
- def disallow_presence_visible(self, observed_localpart, observer_userid):
- return self._simple_delete_one(
- table="presence_allow_inbound",
- keyvalues={
- "observed_user_id": observed_localpart,
- "observer_user_id": observer_userid,
- },
- desc="disallow_presence_visible",
- )
diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py
index c4e24edff2..f47cec0d86 100644
--- a/synapse/storage/push_rule.py
+++ b/synapse/storage/push_rule.py
@@ -14,704 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import abc
-import logging
-
-from canonicaljson import json
-
-from twisted.internet import defer
-
-from synapse.push.baserules import list_with_base_rules
-from synapse.storage.appservice import ApplicationServiceWorkerStore
-from synapse.storage.pusher import PusherWorkerStore
-from synapse.storage.receipts import ReceiptsWorkerStore
-from synapse.storage.roommember import RoomMemberWorkerStore
-from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
-from synapse.util.caches.stream_change_cache import StreamChangeCache
-
-from ._base import SQLBaseStore
-
-logger = logging.getLogger(__name__)
-
-
-def _load_rules(rawrules, enabled_map):
- ruleslist = []
- for rawrule in rawrules:
- rule = dict(rawrule)
- rule["conditions"] = json.loads(rawrule["conditions"])
- rule["actions"] = json.loads(rawrule["actions"])
- ruleslist.append(rule)
-
- # We're going to be mutating this a lot, so do a deep copy
- rules = list(list_with_base_rules(ruleslist))
-
- for i, rule in enumerate(rules):
- rule_id = rule["rule_id"]
- if rule_id in enabled_map:
- if rule.get("enabled", True) != bool(enabled_map[rule_id]):
- # Rules are cached across users.
- rule = dict(rule)
- rule["enabled"] = bool(enabled_map[rule_id])
- rules[i] = rule
-
- return rules
-
-
-class PushRulesWorkerStore(
- ApplicationServiceWorkerStore,
- ReceiptsWorkerStore,
- PusherWorkerStore,
- RoomMemberWorkerStore,
- SQLBaseStore,
-):
- """This is an abstract base class where subclasses must implement
- `get_max_push_rules_stream_id` which can be called in the initializer.
- """
-
- # This ABCMeta metaclass ensures that we cannot be instantiated without
- # the abstract methods being implemented.
- __metaclass__ = abc.ABCMeta
-
- def __init__(self, db_conn, hs):
- super(PushRulesWorkerStore, self).__init__(db_conn, hs)
-
- push_rules_prefill, push_rules_id = self._get_cache_dict(
- db_conn,
- "push_rules_stream",
- entity_column="user_id",
- stream_column="stream_id",
- max_value=self.get_max_push_rules_stream_id(),
- )
-
- self.push_rules_stream_cache = StreamChangeCache(
- "PushRulesStreamChangeCache",
- push_rules_id,
- prefilled_cache=push_rules_prefill,
- )
-
- @abc.abstractmethod
- def get_max_push_rules_stream_id(self):
- """Get the position of the push rules stream.
-
- Returns:
- int
- """
- raise NotImplementedError()
-
- @cachedInlineCallbacks(max_entries=5000)
- def get_push_rules_for_user(self, user_id):
- rows = yield self._simple_select_list(
- table="push_rules",
- keyvalues={"user_name": user_id},
- retcols=(
- "user_name",
- "rule_id",
- "priority_class",
- "priority",
- "conditions",
- "actions",
- ),
- desc="get_push_rules_enabled_for_user",
- )
-
- rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
-
- enabled_map = yield self.get_push_rules_enabled_for_user(user_id)
-
- rules = _load_rules(rows, enabled_map)
-
- return rules
-
- @cachedInlineCallbacks(max_entries=5000)
- def get_push_rules_enabled_for_user(self, user_id):
- results = yield self._simple_select_list(
- table="push_rules_enable",
- keyvalues={"user_name": user_id},
- retcols=("user_name", "rule_id", "enabled"),
- desc="get_push_rules_enabled_for_user",
- )
- return {r["rule_id"]: False if r["enabled"] == 0 else True for r in results}
-
- def have_push_rules_changed_for_user(self, user_id, last_id):
- if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id):
- return defer.succeed(False)
- else:
-
- def have_push_rules_changed_txn(txn):
- sql = (
- "SELECT COUNT(stream_id) FROM push_rules_stream"
- " WHERE user_id = ? AND ? < stream_id"
- )
- txn.execute(sql, (user_id, last_id))
- count, = txn.fetchone()
- return bool(count)
-
- return self.runInteraction(
- "have_push_rules_changed", have_push_rules_changed_txn
- )
-
- @cachedList(
- cached_method_name="get_push_rules_for_user",
- list_name="user_ids",
- num_args=1,
- inlineCallbacks=True,
- )
- def bulk_get_push_rules(self, user_ids):
- if not user_ids:
- return {}
-
- results = {user_id: [] for user_id in user_ids}
-
- rows = yield self._simple_select_many_batch(
- table="push_rules",
- column="user_name",
- iterable=user_ids,
- retcols=("*",),
- desc="bulk_get_push_rules",
- )
-
- rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
-
- for row in rows:
- results.setdefault(row["user_name"], []).append(row)
-
- enabled_map_by_user = yield self.bulk_get_push_rules_enabled(user_ids)
-
- for user_id, rules in results.items():
- results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
-
- return results
-
- @defer.inlineCallbacks
- def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
- """Copy a single push rule from one room to another for a specific user.
-
- Args:
- new_room_id (str): ID of the new room.
- user_id (str): ID of user the push rule belongs to.
- rule (Dict): A push rule.
- """
- # Create new rule id
- rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1])
- new_rule_id = rule_id_scope + "/" + new_room_id
-
- # Change room id in each condition
- for condition in rule.get("conditions", []):
- if condition.get("key") == "room_id":
- condition["pattern"] = new_room_id
-
- # Add the rule for the new room
- yield self.add_push_rule(
- user_id=user_id,
- rule_id=new_rule_id,
- priority_class=rule["priority_class"],
- conditions=rule["conditions"],
- actions=rule["actions"],
- )
-
- @defer.inlineCallbacks
- def copy_push_rules_from_room_to_room_for_user(
- self, old_room_id, new_room_id, user_id
- ):
- """Copy all of the push rules from one room to another for a specific
- user.
-
- Args:
- old_room_id (str): ID of the old room.
- new_room_id (str): ID of the new room.
- user_id (str): ID of user to copy push rules for.
- """
- # Retrieve push rules for this user
- user_push_rules = yield self.get_push_rules_for_user(user_id)
-
- # Get rules relating to the old room and copy them to the new room
- for rule in user_push_rules:
- conditions = rule.get("conditions", [])
- if any(
- (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
- for c in conditions
- ):
- yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule)
-
- @defer.inlineCallbacks
- def bulk_get_push_rules_for_room(self, event, context):
- state_group = context.state_group
- if not state_group:
- # If state_group is None it means it has yet to be assigned a
- # state group, i.e. we need to make sure that calls with a state_group
- # of None don't hit previous cached calls with a None state_group.
- # To do this we set the state_group to a new object as object() != object()
- state_group = object()
-
- current_state_ids = yield context.get_current_state_ids(self)
- result = yield self._bulk_get_push_rules_for_room(
- event.room_id, state_group, current_state_ids, event=event
- )
- return result
-
- @cachedInlineCallbacks(num_args=2, cache_context=True)
- def _bulk_get_push_rules_for_room(
- self, room_id, state_group, current_state_ids, cache_context, event=None
- ):
- # We don't use `state_group`, its there so that we can cache based
- # on it. However, its important that its never None, since two current_state's
- # with a state_group of None are likely to be different.
- # See bulk_get_push_rules_for_room for how we work around this.
- assert state_group is not None
-
- # We also will want to generate notifs for other people in the room so
- # their unread countss are correct in the event stream, but to avoid
- # generating them for bot / AS users etc, we only do so for people who've
- # sent a read receipt into the room.
-
- users_in_room = yield self._get_joined_users_from_context(
- room_id,
- state_group,
- current_state_ids,
- on_invalidate=cache_context.invalidate,
- event=event,
- )
-
- # We ignore app service users for now. This is so that we don't fill
- # up the `get_if_users_have_pushers` cache with AS entries that we
- # know don't have pushers, nor even read receipts.
- local_users_in_room = set(
- u
- for u in users_in_room
- if self.hs.is_mine_id(u)
- and not self.get_if_app_services_interested_in_user(u)
- )
-
- # users in the room who have pushers need to get push rules run because
- # that's how their pushers work
- if_users_with_pushers = yield self.get_if_users_have_pushers(
- local_users_in_room, on_invalidate=cache_context.invalidate
- )
- user_ids = set(
- uid for uid, have_pusher in if_users_with_pushers.items() if have_pusher
- )
-
- users_with_receipts = yield self.get_users_with_read_receipts_in_room(
- room_id, on_invalidate=cache_context.invalidate
- )
-
- # any users with pushers must be ours: they have pushers
- for uid in users_with_receipts:
- if uid in local_users_in_room:
- user_ids.add(uid)
-
- rules_by_user = yield self.bulk_get_push_rules(
- user_ids, on_invalidate=cache_context.invalidate
- )
-
- rules_by_user = {k: v for k, v in rules_by_user.items() if v is not None}
-
- return rules_by_user
-
- @cachedList(
- cached_method_name="get_push_rules_enabled_for_user",
- list_name="user_ids",
- num_args=1,
- inlineCallbacks=True,
- )
- def bulk_get_push_rules_enabled(self, user_ids):
- if not user_ids:
- return {}
-
- results = {user_id: {} for user_id in user_ids}
-
- rows = yield self._simple_select_many_batch(
- table="push_rules_enable",
- column="user_name",
- iterable=user_ids,
- retcols=("user_name", "rule_id", "enabled"),
- desc="bulk_get_push_rules_enabled",
- )
- for row in rows:
- enabled = bool(row["enabled"])
- results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled
- return results
-
-
-class PushRuleStore(PushRulesWorkerStore):
- @defer.inlineCallbacks
- def add_push_rule(
- self,
- user_id,
- rule_id,
- priority_class,
- conditions,
- actions,
- before=None,
- after=None,
- ):
- conditions_json = json.dumps(conditions)
- actions_json = json.dumps(actions)
- with self._push_rules_stream_id_gen.get_next() as ids:
- stream_id, event_stream_ordering = ids
- if before or after:
- yield self.runInteraction(
- "_add_push_rule_relative_txn",
- self._add_push_rule_relative_txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- conditions_json,
- actions_json,
- before,
- after,
- )
- else:
- yield self.runInteraction(
- "_add_push_rule_highest_priority_txn",
- self._add_push_rule_highest_priority_txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- conditions_json,
- actions_json,
- )
-
- def _add_push_rule_relative_txn(
- self,
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- conditions_json,
- actions_json,
- before,
- after,
- ):
- # Lock the table since otherwise we'll have annoying races between the
- # SELECT here and the UPSERT below.
- self.database_engine.lock_table(txn, "push_rules")
-
- relative_to_rule = before or after
-
- res = self._simple_select_one_txn(
- txn,
- table="push_rules",
- keyvalues={"user_name": user_id, "rule_id": relative_to_rule},
- retcols=["priority_class", "priority"],
- allow_none=True,
- )
-
- if not res:
- raise RuleNotFoundException(
- "before/after rule not found: %s" % (relative_to_rule,)
- )
-
- base_priority_class = res["priority_class"]
- base_rule_priority = res["priority"]
-
- if base_priority_class != priority_class:
- raise InconsistentRuleException(
- "Given priority class does not match class of relative rule"
- )
-
- if before:
- # Higher priority rules are executed first, So adding a rule before
- # a rule means giving it a higher priority than that rule.
- new_rule_priority = base_rule_priority + 1
- else:
- # We increment the priority of the existing rules to make space for
- # the new rule. Therefore if we want this rule to appear after
- # an existing rule we give it the priority of the existing rule,
- # and then increment the priority of the existing rule.
- new_rule_priority = base_rule_priority
-
- sql = (
- "UPDATE push_rules SET priority = priority + 1"
- " WHERE user_name = ? AND priority_class = ? AND priority >= ?"
- )
-
- txn.execute(sql, (user_id, priority_class, new_rule_priority))
-
- self._upsert_push_rule_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- new_rule_priority,
- conditions_json,
- actions_json,
- )
-
- def _add_push_rule_highest_priority_txn(
- self,
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- conditions_json,
- actions_json,
- ):
- # Lock the table since otherwise we'll have annoying races between the
- # SELECT here and the UPSERT below.
- self.database_engine.lock_table(txn, "push_rules")
-
- # find the highest priority rule in that class
- sql = (
- "SELECT COUNT(*), MAX(priority) FROM push_rules"
- " WHERE user_name = ? and priority_class = ?"
- )
- txn.execute(sql, (user_id, priority_class))
- res = txn.fetchall()
- (how_many, highest_prio) = res[0]
-
- new_prio = 0
- if how_many > 0:
- new_prio = highest_prio + 1
-
- self._upsert_push_rule_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- new_prio,
- conditions_json,
- actions_json,
- )
-
- def _upsert_push_rule_txn(
- self,
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- priority,
- conditions_json,
- actions_json,
- update_stream=True,
- ):
- """Specialised version of _simple_upsert_txn that picks a push_rule_id
- using the _push_rule_id_gen if it needs to insert the rule. It assumes
- that the "push_rules" table is locked"""
-
- sql = (
- "UPDATE push_rules"
- " SET priority_class = ?, priority = ?, conditions = ?, actions = ?"
- " WHERE user_name = ? AND rule_id = ?"
- )
-
- txn.execute(
- sql,
- (priority_class, priority, conditions_json, actions_json, user_id, rule_id),
- )
-
- if txn.rowcount == 0:
- # We didn't update a row with the given rule_id so insert one
- push_rule_id = self._push_rule_id_gen.get_next()
-
- self._simple_insert_txn(
- txn,
- table="push_rules",
- values={
- "id": push_rule_id,
- "user_name": user_id,
- "rule_id": rule_id,
- "priority_class": priority_class,
- "priority": priority,
- "conditions": conditions_json,
- "actions": actions_json,
- },
- )
-
- if update_stream:
- self._insert_push_rules_update_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- op="ADD",
- data={
- "priority_class": priority_class,
- "priority": priority,
- "conditions": conditions_json,
- "actions": actions_json,
- },
- )
-
- @defer.inlineCallbacks
- def delete_push_rule(self, user_id, rule_id):
- """
- Delete a push rule. Args specify the row to be deleted and can be
- any of the columns in the push_rule table, but below are the
- standard ones
-
- Args:
- user_id (str): The matrix ID of the push rule owner
- rule_id (str): The rule_id of the rule to be deleted
- """
-
- def delete_push_rule_txn(txn, stream_id, event_stream_ordering):
- self._simple_delete_one_txn(
- txn, "push_rules", {"user_name": user_id, "rule_id": rule_id}
- )
-
- self._insert_push_rules_update_txn(
- txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE"
- )
-
- with self._push_rules_stream_id_gen.get_next() as ids:
- stream_id, event_stream_ordering = ids
- yield self.runInteraction(
- "delete_push_rule",
- delete_push_rule_txn,
- stream_id,
- event_stream_ordering,
- )
-
- @defer.inlineCallbacks
- def set_push_rule_enabled(self, user_id, rule_id, enabled):
- with self._push_rules_stream_id_gen.get_next() as ids:
- stream_id, event_stream_ordering = ids
- yield self.runInteraction(
- "_set_push_rule_enabled_txn",
- self._set_push_rule_enabled_txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- enabled,
- )
-
- def _set_push_rule_enabled_txn(
- self, txn, stream_id, event_stream_ordering, user_id, rule_id, enabled
- ):
- new_id = self._push_rules_enable_id_gen.get_next()
- self._simple_upsert_txn(
- txn,
- "push_rules_enable",
- {"user_name": user_id, "rule_id": rule_id},
- {"enabled": 1 if enabled else 0},
- {"id": new_id},
- )
-
- self._insert_push_rules_update_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- op="ENABLE" if enabled else "DISABLE",
- )
-
- @defer.inlineCallbacks
- def set_push_rule_actions(self, user_id, rule_id, actions, is_default_rule):
- actions_json = json.dumps(actions)
-
- def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering):
- if is_default_rule:
- # Add a dummy rule to the rules table with the user specified
- # actions.
- priority_class = -1
- priority = 1
- self._upsert_push_rule_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- priority_class,
- priority,
- "[]",
- actions_json,
- update_stream=False,
- )
- else:
- self._simple_update_one_txn(
- txn,
- "push_rules",
- {"user_name": user_id, "rule_id": rule_id},
- {"actions": actions_json},
- )
-
- self._insert_push_rules_update_txn(
- txn,
- stream_id,
- event_stream_ordering,
- user_id,
- rule_id,
- op="ACTIONS",
- data={"actions": actions_json},
- )
-
- with self._push_rules_stream_id_gen.get_next() as ids:
- stream_id, event_stream_ordering = ids
- yield self.runInteraction(
- "set_push_rule_actions",
- set_push_rule_actions_txn,
- stream_id,
- event_stream_ordering,
- )
-
- def _insert_push_rules_update_txn(
- self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None
- ):
- values = {
- "stream_id": stream_id,
- "event_stream_ordering": event_stream_ordering,
- "user_id": user_id,
- "rule_id": rule_id,
- "op": op,
- }
- if data is not None:
- values.update(data)
-
- self._simple_insert_txn(txn, "push_rules_stream", values=values)
-
- txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,))
- txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,))
- txn.call_after(
- self.push_rules_stream_cache.entity_has_changed, user_id, stream_id
- )
-
- def get_all_push_rule_updates(self, last_id, current_id, limit):
- """Get all the push rules changes that have happend on the server"""
- if last_id == current_id:
- return defer.succeed([])
-
- def get_all_push_rule_updates_txn(txn):
- sql = (
- "SELECT stream_id, event_stream_ordering, user_id, rule_id,"
- " op, priority_class, priority, conditions, actions"
- " FROM push_rules_stream"
- " WHERE ? < stream_id AND stream_id <= ?"
- " ORDER BY stream_id ASC LIMIT ?"
- )
- txn.execute(sql, (last_id, current_id, limit))
- return txn.fetchall()
-
- return self.runInteraction(
- "get_all_push_rule_updates", get_all_push_rule_updates_txn
- )
-
- def get_push_rules_stream_token(self):
- """Get the position of the push rules stream.
- Returns a pair of a stream id for the push_rules stream and the
- room stream ordering it corresponds to."""
- return self._push_rules_stream_id_gen.get_current_token()
-
- def get_max_push_rules_stream_id(self):
- return self.get_push_rules_stream_token()[0]
-
class RuleNotFoundException(Exception):
pass
diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py
index fcb5f2f23a..d471ec9860 100644
--- a/synapse/storage/relations.py
+++ b/synapse/storage/relations.py
@@ -17,11 +17,7 @@ import logging
import attr
-from synapse.api.constants import RelationTypes
from synapse.api.errors import SynapseError
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.stream import generate_pagination_where_clause
-from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
logger = logging.getLogger(__name__)
@@ -113,358 +109,3 @@ class AggregationPaginationToken(object):
def as_tuple(self):
return attr.astuple(self)
-
-
-class RelationsWorkerStore(SQLBaseStore):
- @cached(tree=True)
- def get_relations_for_event(
- self,
- event_id,
- relation_type=None,
- event_type=None,
- aggregation_key=None,
- limit=5,
- direction="b",
- from_token=None,
- to_token=None,
- ):
- """Get a list of relations for an event, ordered by topological ordering.
-
- Args:
- event_id (str): Fetch events that relate to this event ID.
- relation_type (str|None): Only fetch events with this relation
- type, if given.
- event_type (str|None): Only fetch events with this event type, if
- given.
- aggregation_key (str|None): Only fetch events with this aggregation
- key, if given.
- limit (int): Only fetch the most recent `limit` events.
- direction (str): Whether to fetch the most recent first (`"b"`) or
- the oldest first (`"f"`).
- from_token (RelationPaginationToken|None): Fetch rows from the given
- token, or from the start if None.
- to_token (RelationPaginationToken|None): Fetch rows up to the given
- token, or up to the end if None.
-
- Returns:
- Deferred[PaginationChunk]: List of event IDs that match relations
- requested. The rows are of the form `{"event_id": "..."}`.
- """
-
- where_clause = ["relates_to_id = ?"]
- where_args = [event_id]
-
- if relation_type is not None:
- where_clause.append("relation_type = ?")
- where_args.append(relation_type)
-
- if event_type is not None:
- where_clause.append("type = ?")
- where_args.append(event_type)
-
- if aggregation_key:
- where_clause.append("aggregation_key = ?")
- where_args.append(aggregation_key)
-
- pagination_clause = generate_pagination_where_clause(
- direction=direction,
- column_names=("topological_ordering", "stream_ordering"),
- from_token=attr.astuple(from_token) if from_token else None,
- to_token=attr.astuple(to_token) if to_token else None,
- engine=self.database_engine,
- )
-
- if pagination_clause:
- where_clause.append(pagination_clause)
-
- if direction == "b":
- order = "DESC"
- else:
- order = "ASC"
-
- sql = """
- SELECT event_id, topological_ordering, stream_ordering
- FROM event_relations
- INNER JOIN events USING (event_id)
- WHERE %s
- ORDER BY topological_ordering %s, stream_ordering %s
- LIMIT ?
- """ % (
- " AND ".join(where_clause),
- order,
- order,
- )
-
- def _get_recent_references_for_event_txn(txn):
- txn.execute(sql, where_args + [limit + 1])
-
- last_topo_id = None
- last_stream_id = None
- events = []
- for row in txn:
- events.append({"event_id": row[0]})
- last_topo_id = row[1]
- last_stream_id = row[2]
-
- next_batch = None
- if len(events) > limit and last_topo_id and last_stream_id:
- next_batch = RelationPaginationToken(last_topo_id, last_stream_id)
-
- return PaginationChunk(
- chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
- )
-
- return self.runInteraction(
- "get_recent_references_for_event", _get_recent_references_for_event_txn
- )
-
- @cached(tree=True)
- def get_aggregation_groups_for_event(
- self,
- event_id,
- event_type=None,
- limit=5,
- direction="b",
- from_token=None,
- to_token=None,
- ):
- """Get a list of annotations on the event, grouped by event type and
- aggregation key, sorted by count.
-
- This is used e.g. to get the what and how many reactions have happend
- on an event.
-
- Args:
- event_id (str): Fetch events that relate to this event ID.
- event_type (str|None): Only fetch events with this event type, if
- given.
- limit (int): Only fetch the `limit` groups.
- direction (str): Whether to fetch the highest count first (`"b"`) or
- the lowest count first (`"f"`).
- from_token (AggregationPaginationToken|None): Fetch rows from the
- given token, or from the start if None.
- to_token (AggregationPaginationToken|None): Fetch rows up to the
- given token, or up to the end if None.
-
-
- Returns:
- Deferred[PaginationChunk]: List of groups of annotations that
- match. Each row is a dict with `type`, `key` and `count` fields.
- """
-
- where_clause = ["relates_to_id = ?", "relation_type = ?"]
- where_args = [event_id, RelationTypes.ANNOTATION]
-
- if event_type:
- where_clause.append("type = ?")
- where_args.append(event_type)
-
- having_clause = generate_pagination_where_clause(
- direction=direction,
- column_names=("COUNT(*)", "MAX(stream_ordering)"),
- from_token=attr.astuple(from_token) if from_token else None,
- to_token=attr.astuple(to_token) if to_token else None,
- engine=self.database_engine,
- )
-
- if direction == "b":
- order = "DESC"
- else:
- order = "ASC"
-
- if having_clause:
- having_clause = "HAVING " + having_clause
- else:
- having_clause = ""
-
- sql = """
- SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering)
- FROM event_relations
- INNER JOIN events USING (event_id)
- WHERE {where_clause}
- GROUP BY relation_type, type, aggregation_key
- {having_clause}
- ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order}
- LIMIT ?
- """.format(
- where_clause=" AND ".join(where_clause),
- order=order,
- having_clause=having_clause,
- )
-
- def _get_aggregation_groups_for_event_txn(txn):
- txn.execute(sql, where_args + [limit + 1])
-
- next_batch = None
- events = []
- for row in txn:
- events.append({"type": row[0], "key": row[1], "count": row[2]})
- next_batch = AggregationPaginationToken(row[2], row[3])
-
- if len(events) <= limit:
- next_batch = None
-
- return PaginationChunk(
- chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
- )
-
- return self.runInteraction(
- "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
- )
-
- @cachedInlineCallbacks()
- def get_applicable_edit(self, event_id):
- """Get the most recent edit (if any) that has happened for the given
- event.
-
- Correctly handles checking whether edits were allowed to happen.
-
- Args:
- event_id (str): The original event ID
-
- Returns:
- Deferred[EventBase|None]: Returns the most recent edit, if any.
- """
-
- # We only allow edits for `m.room.message` events that have the same sender
- # and event type. We can't assert these things during regular event auth so
- # we have to do the checks post hoc.
-
- # Fetches latest edit that has the same type and sender as the
- # original, and is an `m.room.message`.
- sql = """
- SELECT edit.event_id FROM events AS edit
- INNER JOIN event_relations USING (event_id)
- INNER JOIN events AS original ON
- original.event_id = relates_to_id
- AND edit.type = original.type
- AND edit.sender = original.sender
- WHERE
- relates_to_id = ?
- AND relation_type = ?
- AND edit.type = 'm.room.message'
- ORDER by edit.origin_server_ts DESC, edit.event_id DESC
- LIMIT 1
- """
-
- def _get_applicable_edit_txn(txn):
- txn.execute(sql, (event_id, RelationTypes.REPLACE))
- row = txn.fetchone()
- if row:
- return row[0]
-
- edit_id = yield self.runInteraction(
- "get_applicable_edit", _get_applicable_edit_txn
- )
-
- if not edit_id:
- return
-
- edit_event = yield self.get_event(edit_id, allow_none=True)
- return edit_event
-
- def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender):
- """Check if a user has already annotated an event with the same key
- (e.g. already liked an event).
-
- Args:
- parent_id (str): The event being annotated
- event_type (str): The event type of the annotation
- aggregation_key (str): The aggregation key of the annotation
- sender (str): The sender of the annotation
-
- Returns:
- Deferred[bool]
- """
-
- sql = """
- SELECT 1 FROM event_relations
- INNER JOIN events USING (event_id)
- WHERE
- relates_to_id = ?
- AND relation_type = ?
- AND type = ?
- AND sender = ?
- AND aggregation_key = ?
- LIMIT 1;
- """
-
- def _get_if_user_has_annotated_event(txn):
- txn.execute(
- sql,
- (
- parent_id,
- RelationTypes.ANNOTATION,
- event_type,
- sender,
- aggregation_key,
- ),
- )
-
- return bool(txn.fetchone())
-
- return self.runInteraction(
- "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
- )
-
-
-class RelationsStore(RelationsWorkerStore):
- def _handle_event_relations(self, txn, event):
- """Handles inserting relation data during peristence of events
-
- Args:
- txn
- event (EventBase)
- """
- relation = event.content.get("m.relates_to")
- if not relation:
- # No relations
- return
-
- rel_type = relation.get("rel_type")
- if rel_type not in (
- RelationTypes.ANNOTATION,
- RelationTypes.REFERENCE,
- RelationTypes.REPLACE,
- ):
- # Unknown relation type
- return
-
- parent_id = relation.get("event_id")
- if not parent_id:
- # Invalid relation
- return
-
- aggregation_key = relation.get("key")
-
- self._simple_insert_txn(
- txn,
- table="event_relations",
- values={
- "event_id": event.event_id,
- "relates_to_id": parent_id,
- "relation_type": rel_type,
- "aggregation_key": aggregation_key,
- },
- )
-
- txn.call_after(self.get_relations_for_event.invalidate_many, (parent_id,))
- txn.call_after(
- self.get_aggregation_groups_for_event.invalidate_many, (parent_id,)
- )
-
- if rel_type == RelationTypes.REPLACE:
- txn.call_after(self.get_applicable_edit.invalidate, (parent_id,))
-
- def _handle_redaction(self, txn, redacted_event_id):
- """Handles receiving a redaction and checking whether we need to remove
- any redacted relations from the database.
-
- Args:
- txn
- redacted_event_id (str): The event that was redacted.
- """
-
- self._simple_delete_txn(
- txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
- )
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index ff63487823..8c4a83a840 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -17,26 +17,6 @@
import logging
from collections import namedtuple
-from six import iteritems, itervalues
-
-from canonicaljson import json
-
-from twisted.internet import defer
-
-from synapse.api.constants import EventTypes, Membership
-from synapse.metrics import LaterGauge
-from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage._base import LoggingTransaction, make_in_list_sql_clause
-from synapse.storage.background_updates import BackgroundUpdateStore
-from synapse.storage.engines import Sqlite3Engine
-from synapse.storage.events_worker import EventsWorkerStore
-from synapse.types import get_domain_from_id
-from synapse.util.async_helpers import Linearizer
-from synapse.util.caches import intern_string
-from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
-from synapse.util.metrics import Measure
-from synapse.util.stringutils import to_ascii
-
logger = logging.getLogger(__name__)
@@ -57,1102 +37,3 @@ ProfileInfo = namedtuple("ProfileInfo", ("avatar_url", "display_name"))
# a given membership type, suitable for use in calculating heroes for a room.
# "count" points to the total numberr of users of a given membership type.
MemberSummary = namedtuple("MemberSummary", ("members", "count"))
-
-_MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
-_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership"
-
-
-class RoomMemberWorkerStore(EventsWorkerStore):
- def __init__(self, db_conn, hs):
- super(RoomMemberWorkerStore, self).__init__(db_conn, hs)
-
- # Is the current_state_events.membership up to date? Or is the
- # background update still running?
- self._current_state_events_membership_up_to_date = False
-
- txn = LoggingTransaction(
- db_conn.cursor(),
- name="_check_safe_current_state_events_membership_updated",
- database_engine=self.database_engine,
- )
- self._check_safe_current_state_events_membership_updated_txn(txn)
- txn.close()
-
- if self.hs.config.metrics_flags.known_servers:
- self._known_servers_count = 1
- self.hs.get_clock().looping_call(
- run_as_background_process,
- 60 * 1000,
- "_count_known_servers",
- self._count_known_servers,
- )
- self.hs.get_clock().call_later(
- 1000,
- run_as_background_process,
- "_count_known_servers",
- self._count_known_servers,
- )
- LaterGauge(
- "synapse_federation_known_servers",
- "",
- [],
- lambda: self._known_servers_count,
- )
-
- @defer.inlineCallbacks
- def _count_known_servers(self):
- """
- Count the servers that this server knows about.
-
- The statistic is stored on the class for the
- `synapse_federation_known_servers` LaterGauge to collect.
- """
-
- def _transact(txn):
- if isinstance(self.database_engine, Sqlite3Engine):
- query = """
- SELECT COUNT(DISTINCT substr(out.user_id, pos+1))
- FROM (
- SELECT rm.user_id as user_id, instr(rm.user_id, ':')
- AS pos FROM room_memberships as rm
- INNER JOIN current_state_events as c ON rm.event_id = c.event_id
- WHERE c.type = 'm.room.member'
- ) as out
- """
- else:
- query = """
- SELECT COUNT(DISTINCT split_part(state_key, ':', 2))
- FROM current_state_events
- WHERE type = 'm.room.member' AND membership = 'join';
- """
- txn.execute(query)
- return list(txn)[0][0]
-
- count = yield self.runInteraction("get_known_servers", _transact)
-
- # We always know about ourselves, even if we have nothing in
- # room_memberships (for example, the server is new).
- self._known_servers_count = max([count, 1])
- return self._known_servers_count
-
- def _check_safe_current_state_events_membership_updated_txn(self, txn):
- """Checks if it is safe to assume the new current_state_events
- membership column is up to date
- """
-
- pending_update = self._simple_select_one_txn(
- txn,
- table="background_updates",
- keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
- retcols=["update_name"],
- allow_none=True,
- )
-
- self._current_state_events_membership_up_to_date = not pending_update
-
- # If the update is still running, reschedule to run.
- if pending_update:
- self._clock.call_later(
- 15.0,
- run_as_background_process,
- "_check_safe_current_state_events_membership_updated",
- self.runInteraction,
- "_check_safe_current_state_events_membership_updated",
- self._check_safe_current_state_events_membership_updated_txn,
- )
-
- @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True)
- def get_hosts_in_room(self, room_id, cache_context):
- """Returns the set of all hosts currently in the room
- """
- user_ids = yield self.get_users_in_room(
- room_id, on_invalidate=cache_context.invalidate
- )
- hosts = frozenset(get_domain_from_id(user_id) for user_id in user_ids)
- return hosts
-
- @cached(max_entries=100000, iterable=True)
- def get_users_in_room(self, room_id):
- return self.runInteraction(
- "get_users_in_room", self.get_users_in_room_txn, room_id
- )
-
- def get_users_in_room_txn(self, txn, room_id):
- # If we can assume current_state_events.membership is up to date
- # then we can avoid a join, which is a Very Good Thing given how
- # frequently this function gets called.
- if self._current_state_events_membership_up_to_date:
- sql = """
- SELECT state_key FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
- """
- else:
- sql = """
- SELECT state_key FROM room_memberships as m
- INNER JOIN current_state_events as c
- ON m.event_id = c.event_id
- AND m.room_id = c.room_id
- AND m.user_id = c.state_key
- WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
- """
-
- txn.execute(sql, (room_id, Membership.JOIN))
- return [to_ascii(r[0]) for r in txn]
-
- @cached(max_entries=100000)
- def get_room_summary(self, room_id):
- """ Get the details of a room roughly suitable for use by the room
- summary extension to /sync. Useful when lazy loading room members.
- Args:
- room_id (str): The room ID to query
- Returns:
- Deferred[dict[str, MemberSummary]:
- dict of membership states, pointing to a MemberSummary named tuple.
- """
-
- def _get_room_summary_txn(txn):
- # first get counts.
- # We do this all in one transaction to keep the cache small.
- # FIXME: get rid of this when we have room_stats
-
- # If we can assume current_state_events.membership is up to date
- # then we can avoid a join, which is a Very Good Thing given how
- # frequently this function gets called.
- if self._current_state_events_membership_up_to_date:
- # Note, rejected events will have a null membership field, so
- # we we manually filter them out.
- sql = """
- SELECT count(*), membership FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ?
- AND membership IS NOT NULL
- GROUP BY membership
- """
- else:
- sql = """
- SELECT count(*), m.membership FROM room_memberships as m
- INNER JOIN current_state_events as c
- ON m.event_id = c.event_id
- AND m.room_id = c.room_id
- AND m.user_id = c.state_key
- WHERE c.type = 'm.room.member' AND c.room_id = ?
- GROUP BY m.membership
- """
-
- txn.execute(sql, (room_id,))
- res = {}
- for count, membership in txn:
- summary = res.setdefault(to_ascii(membership), MemberSummary([], count))
-
- # we order by membership and then fairly arbitrarily by event_id so
- # heroes are consistent
- if self._current_state_events_membership_up_to_date:
- # Note, rejected events will have a null membership field, so
- # we we manually filter them out.
- sql = """
- SELECT state_key, membership, event_id
- FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ?
- AND membership IS NOT NULL
- ORDER BY
- CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
- event_id ASC
- LIMIT ?
- """
- else:
- sql = """
- SELECT c.state_key, m.membership, c.event_id
- FROM room_memberships as m
- INNER JOIN current_state_events as c USING (room_id, event_id)
- WHERE c.type = 'm.room.member' AND c.room_id = ?
- ORDER BY
- CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
- c.event_id ASC
- LIMIT ?
- """
-
- # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
- txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
- for user_id, membership, event_id in txn:
- summary = res[to_ascii(membership)]
- # we will always have a summary for this membership type at this
- # point given the summary currently contains the counts.
- members = summary.members
- members.append((to_ascii(user_id), to_ascii(event_id)))
-
- return res
-
- return self.runInteraction("get_room_summary", _get_room_summary_txn)
-
- def _get_user_counts_in_room_txn(self, txn, room_id):
- """
- Get the user count in a room by membership.
-
- Args:
- room_id (str)
- membership (Membership)
-
- Returns:
- Deferred[int]
- """
- sql = """
- SELECT m.membership, count(*) FROM room_memberships as m
- INNER JOIN current_state_events as c USING(event_id)
- WHERE c.type = 'm.room.member' AND c.room_id = ?
- GROUP BY m.membership
- """
-
- txn.execute(sql, (room_id,))
- return {row[0]: row[1] for row in txn}
-
- @cached()
- def get_invited_rooms_for_user(self, user_id):
- """ Get all the rooms the user is invited to
- Args:
- user_id (str): The user ID.
- Returns:
- A deferred list of RoomsForUser.
- """
-
- return self.get_rooms_for_user_where_membership_is(user_id, [Membership.INVITE])
-
- @defer.inlineCallbacks
- def get_invite_for_user_in_room(self, user_id, room_id):
- """Gets the invite for the given user and room
-
- Args:
- user_id (str)
- room_id (str)
-
- Returns:
- Deferred: Resolves to either a RoomsForUser or None if no invite was
- found.
- """
- invites = yield self.get_invited_rooms_for_user(user_id)
- for invite in invites:
- if invite.room_id == room_id:
- return invite
- return None
-
- @defer.inlineCallbacks
- def get_rooms_for_user_where_membership_is(self, user_id, membership_list):
- """ Get all the rooms for this user where the membership for this user
- matches one in the membership list.
-
- Filters out forgotten rooms.
-
- Args:
- user_id (str): The user ID.
- membership_list (list): A list of synapse.api.constants.Membership
- values which the user must be in.
-
- Returns:
- Deferred[list[RoomsForUser]]
- """
- if not membership_list:
- return defer.succeed(None)
-
- rooms = yield self.runInteraction(
- "get_rooms_for_user_where_membership_is",
- self._get_rooms_for_user_where_membership_is_txn,
- user_id,
- membership_list,
- )
-
- # Now we filter out forgotten rooms
- forgotten_rooms = yield self.get_forgotten_rooms_for_user(user_id)
- return [room for room in rooms if room.room_id not in forgotten_rooms]
-
- def _get_rooms_for_user_where_membership_is_txn(
- self, txn, user_id, membership_list
- ):
-
- do_invite = Membership.INVITE in membership_list
- membership_list = [m for m in membership_list if m != Membership.INVITE]
-
- results = []
- if membership_list:
- if self._current_state_events_membership_up_to_date:
- clause, args = make_in_list_sql_clause(
- self.database_engine, "c.membership", membership_list
- )
- sql = """
- SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND state_key = ?
- AND %s
- """ % (
- clause,
- )
- else:
- clause, args = make_in_list_sql_clause(
- self.database_engine, "m.membership", membership_list
- )
- sql = """
- SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering
- FROM current_state_events AS c
- INNER JOIN room_memberships AS m USING (room_id, event_id)
- INNER JOIN events AS e USING (room_id, event_id)
- WHERE
- c.type = 'm.room.member'
- AND state_key = ?
- AND %s
- """ % (
- clause,
- )
-
- txn.execute(sql, (user_id, *args))
- results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)]
-
- if do_invite:
- sql = (
- "SELECT i.room_id, inviter, i.event_id, e.stream_ordering"
- " FROM local_invites as i"
- " INNER JOIN events as e USING (event_id)"
- " WHERE invitee = ? AND locally_rejected is NULL"
- " AND replaced_by is NULL"
- )
-
- txn.execute(sql, (user_id,))
- results.extend(
- RoomsForUser(
- room_id=r["room_id"],
- sender=r["inviter"],
- event_id=r["event_id"],
- stream_ordering=r["stream_ordering"],
- membership=Membership.INVITE,
- )
- for r in self.cursor_to_dict(txn)
- )
-
- return results
-
- @cachedInlineCallbacks(max_entries=500000, iterable=True)
- def get_rooms_for_user_with_stream_ordering(self, user_id):
- """Returns a set of room_ids the user is currently joined to
-
- Args:
- user_id (str)
-
- Returns:
- Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns
- the rooms the user is in currently, along with the stream ordering
- of the most recent join for that user and room.
- """
- rooms = yield self.get_rooms_for_user_where_membership_is(
- user_id, membership_list=[Membership.JOIN]
- )
- return frozenset(
- GetRoomsForUserWithStreamOrdering(r.room_id, r.stream_ordering)
- for r in rooms
- )
-
- @defer.inlineCallbacks
- def get_rooms_for_user(self, user_id, on_invalidate=None):
- """Returns a set of room_ids the user is currently joined to
- """
- rooms = yield self.get_rooms_for_user_with_stream_ordering(
- user_id, on_invalidate=on_invalidate
- )
- return frozenset(r.room_id for r in rooms)
-
- @cachedInlineCallbacks(max_entries=500000, cache_context=True, iterable=True)
- def get_users_who_share_room_with_user(self, user_id, cache_context):
- """Returns the set of users who share a room with `user_id`
- """
- room_ids = yield self.get_rooms_for_user(
- user_id, on_invalidate=cache_context.invalidate
- )
-
- user_who_share_room = set()
- for room_id in room_ids:
- user_ids = yield self.get_users_in_room(
- room_id, on_invalidate=cache_context.invalidate
- )
- user_who_share_room.update(user_ids)
-
- return user_who_share_room
-
- @defer.inlineCallbacks
- def get_joined_users_from_context(self, event, context):
- state_group = context.state_group
- if not state_group:
- # If state_group is None it means it has yet to be assigned a
- # state group, i.e. we need to make sure that calls with a state_group
- # of None don't hit previous cached calls with a None state_group.
- # To do this we set the state_group to a new object as object() != object()
- state_group = object()
-
- current_state_ids = yield context.get_current_state_ids(self)
- result = yield self._get_joined_users_from_context(
- event.room_id, state_group, current_state_ids, event=event, context=context
- )
- return result
-
- @defer.inlineCallbacks
- def get_joined_users_from_state(self, room_id, state_entry):
- state_group = state_entry.state_group
- if not state_group:
- # If state_group is None it means it has yet to be assigned a
- # state group, i.e. we need to make sure that calls with a state_group
- # of None don't hit previous cached calls with a None state_group.
- # To do this we set the state_group to a new object as object() != object()
- state_group = object()
-
- with Measure(self._clock, "get_joined_users_from_state"):
- return (
- yield self._get_joined_users_from_context(
- room_id, state_group, state_entry.state, context=state_entry
- )
- )
-
- @cachedInlineCallbacks(
- num_args=2, cache_context=True, iterable=True, max_entries=100000
- )
- def _get_joined_users_from_context(
- self,
- room_id,
- state_group,
- current_state_ids,
- cache_context,
- event=None,
- context=None,
- ):
- # We don't use `state_group`, it's there so that we can cache based
- # on it. However, it's important that it's never None, since two current_states
- # with a state_group of None are likely to be different.
- # See bulk_get_push_rules_for_room for how we work around this.
- assert state_group is not None
-
- users_in_room = {}
- member_event_ids = [
- e_id
- for key, e_id in iteritems(current_state_ids)
- if key[0] == EventTypes.Member
- ]
-
- if context is not None:
- # If we have a context with a delta from a previous state group,
- # check if we also have the result from the previous group in cache.
- # If we do then we can reuse that result and simply update it with
- # any membership changes in `delta_ids`
- if context.prev_group and context.delta_ids:
- prev_res = self._get_joined_users_from_context.cache.get(
- (room_id, context.prev_group), None
- )
- if prev_res and isinstance(prev_res, dict):
- users_in_room = dict(prev_res)
- member_event_ids = [
- e_id
- for key, e_id in iteritems(context.delta_ids)
- if key[0] == EventTypes.Member
- ]
- for etype, state_key in context.delta_ids:
- users_in_room.pop(state_key, None)
-
- # We check if we have any of the member event ids in the event cache
- # before we ask the DB
-
- # We don't update the event cache hit ratio as it completely throws off
- # the hit ratio counts. After all, we don't populate the cache if we
- # miss it here
- event_map = self._get_events_from_cache(
- member_event_ids, allow_rejected=False, update_metrics=False
- )
-
- missing_member_event_ids = []
- for event_id in member_event_ids:
- ev_entry = event_map.get(event_id)
- if ev_entry:
- if ev_entry.event.membership == Membership.JOIN:
- users_in_room[to_ascii(ev_entry.event.state_key)] = ProfileInfo(
- display_name=to_ascii(
- ev_entry.event.content.get("displayname", None)
- ),
- avatar_url=to_ascii(
- ev_entry.event.content.get("avatar_url", None)
- ),
- )
- else:
- missing_member_event_ids.append(event_id)
-
- if missing_member_event_ids:
- event_to_memberships = yield self._get_joined_profiles_from_event_ids(
- missing_member_event_ids
- )
- users_in_room.update((row for row in event_to_memberships.values() if row))
-
- if event is not None and event.type == EventTypes.Member:
- if event.membership == Membership.JOIN:
- if event.event_id in member_event_ids:
- users_in_room[to_ascii(event.state_key)] = ProfileInfo(
- display_name=to_ascii(event.content.get("displayname", None)),
- avatar_url=to_ascii(event.content.get("avatar_url", None)),
- )
-
- return users_in_room
-
- @cached(max_entries=10000)
- def _get_joined_profile_from_event_id(self, event_id):
- raise NotImplementedError()
-
- @cachedList(
- cached_method_name="_get_joined_profile_from_event_id",
- list_name="event_ids",
- inlineCallbacks=True,
- )
- def _get_joined_profiles_from_event_ids(self, event_ids):
- """For given set of member event_ids check if they point to a join
- event and if so return the associated user and profile info.
-
- Args:
- event_ids (Iterable[str]): The member event IDs to lookup
-
- Returns:
- Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID
- to `user_id` and ProfileInfo (or None if not join event).
- """
-
- rows = yield self._simple_select_many_batch(
- table="room_memberships",
- column="event_id",
- iterable=event_ids,
- retcols=("user_id", "display_name", "avatar_url", "event_id"),
- keyvalues={"membership": Membership.JOIN},
- batch_size=500,
- desc="_get_membership_from_event_ids",
- )
-
- return {
- row["event_id"]: (
- row["user_id"],
- ProfileInfo(
- avatar_url=row["avatar_url"], display_name=row["display_name"]
- ),
- )
- for row in rows
- }
-
- @cachedInlineCallbacks(max_entries=10000)
- def is_host_joined(self, room_id, host):
- if "%" in host or "_" in host:
- raise Exception("Invalid host name")
-
- sql = """
- SELECT state_key FROM current_state_events AS c
- INNER JOIN room_memberships AS m USING (event_id)
- WHERE m.membership = 'join'
- AND type = 'm.room.member'
- AND c.room_id = ?
- AND state_key LIKE ?
- LIMIT 1
- """
-
- # We do need to be careful to ensure that host doesn't have any wild cards
- # in it, but we checked above for known ones and we'll check below that
- # the returned user actually has the correct domain.
- like_clause = "%:" + host
-
- rows = yield self._execute("is_host_joined", None, sql, room_id, like_clause)
-
- if not rows:
- return False
-
- user_id = rows[0][0]
- if get_domain_from_id(user_id) != host:
- # This can only happen if the host name has something funky in it
- raise Exception("Invalid host name")
-
- return True
-
- @cachedInlineCallbacks()
- def was_host_joined(self, room_id, host):
- """Check whether the server is or ever was in the room.
-
- Args:
- room_id (str)
- host (str)
-
- Returns:
- Deferred: Resolves to True if the host is/was in the room, otherwise
- False.
- """
- if "%" in host or "_" in host:
- raise Exception("Invalid host name")
-
- sql = """
- SELECT user_id FROM room_memberships
- WHERE room_id = ?
- AND user_id LIKE ?
- AND membership = 'join'
- LIMIT 1
- """
-
- # We do need to be careful to ensure that host doesn't have any wild cards
- # in it, but we checked above for known ones and we'll check below that
- # the returned user actually has the correct domain.
- like_clause = "%:" + host
-
- rows = yield self._execute("was_host_joined", None, sql, room_id, like_clause)
-
- if not rows:
- return False
-
- user_id = rows[0][0]
- if get_domain_from_id(user_id) != host:
- # This can only happen if the host name has something funky in it
- raise Exception("Invalid host name")
-
- return True
-
- @defer.inlineCallbacks
- def get_joined_hosts(self, room_id, state_entry):
- state_group = state_entry.state_group
- if not state_group:
- # If state_group is None it means it has yet to be assigned a
- # state group, i.e. we need to make sure that calls with a state_group
- # of None don't hit previous cached calls with a None state_group.
- # To do this we set the state_group to a new object as object() != object()
- state_group = object()
-
- with Measure(self._clock, "get_joined_hosts"):
- return (
- yield self._get_joined_hosts(
- room_id, state_group, state_entry.state, state_entry=state_entry
- )
- )
-
- @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True)
- # @defer.inlineCallbacks
- def _get_joined_hosts(self, room_id, state_group, current_state_ids, state_entry):
- # We don't use `state_group`, its there so that we can cache based
- # on it. However, its important that its never None, since two current_state's
- # with a state_group of None are likely to be different.
- # See bulk_get_push_rules_for_room for how we work around this.
- assert state_group is not None
-
- cache = self._get_joined_hosts_cache(room_id)
- joined_hosts = yield cache.get_destinations(state_entry)
-
- return joined_hosts
-
- @cached(max_entries=10000)
- def _get_joined_hosts_cache(self, room_id):
- return _JoinedHostsCache(self, room_id)
-
- @cachedInlineCallbacks(num_args=2)
- def did_forget(self, user_id, room_id):
- """Returns whether user_id has elected to discard history for room_id.
-
- Returns False if they have since re-joined."""
-
- def f(txn):
- sql = (
- "SELECT"
- " COUNT(*)"
- " FROM"
- " room_memberships"
- " WHERE"
- " user_id = ?"
- " AND"
- " room_id = ?"
- " AND"
- " forgotten = 0"
- )
- txn.execute(sql, (user_id, room_id))
- rows = txn.fetchall()
- return rows[0][0]
-
- count = yield self.runInteraction("did_forget_membership", f)
- return count == 0
-
- @cached()
- def get_forgotten_rooms_for_user(self, user_id):
- """Gets all rooms the user has forgotten.
-
- Args:
- user_id (str)
-
- Returns:
- Deferred[set[str]]
- """
-
- def _get_forgotten_rooms_for_user_txn(txn):
- # This is a slightly convoluted query that first looks up all rooms
- # that the user has forgotten in the past, then rechecks that list
- # to see if any have subsequently been updated. This is done so that
- # we can use a partial index on `forgotten = 1` on the assumption
- # that few users will actually forget many rooms.
- #
- # Note that a room is considered "forgotten" if *all* membership
- # events for that user and room have the forgotten field set (as
- # when a user forgets a room we update all rows for that user and
- # room, not just the current one).
- sql = """
- SELECT room_id, (
- SELECT count(*) FROM room_memberships
- WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0
- ) AS count
- FROM room_memberships AS m
- WHERE user_id = ? AND forgotten = 1
- GROUP BY room_id, user_id;
- """
- txn.execute(sql, (user_id,))
- return set(row[0] for row in txn if row[1] == 0)
-
- return self.runInteraction(
- "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn
- )
-
- @defer.inlineCallbacks
- def get_rooms_user_has_been_in(self, user_id):
- """Get all rooms that the user has ever been in.
-
- Args:
- user_id (str)
-
- Returns:
- Deferred[set[str]]: Set of room IDs.
- """
-
- room_ids = yield self._simple_select_onecol(
- table="room_memberships",
- keyvalues={"membership": Membership.JOIN, "user_id": user_id},
- retcol="room_id",
- desc="get_rooms_user_has_been_in",
- )
-
- return set(room_ids)
-
-
-class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore):
- def __init__(self, db_conn, hs):
- super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs)
- self.register_background_update_handler(
- _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile
- )
- self.register_background_update_handler(
- _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
- self._background_current_state_membership,
- )
- self.register_background_index_update(
- "room_membership_forgotten_idx",
- index_name="room_memberships_user_room_forgotten",
- table="room_memberships",
- columns=["user_id", "room_id"],
- where_clause="forgotten = 1",
- )
-
- @defer.inlineCallbacks
- def _background_add_membership_profile(self, progress, batch_size):
- target_min_stream_id = progress.get(
- "target_min_stream_id_inclusive", self._min_stream_order_on_start
- )
- max_stream_id = progress.get(
- "max_stream_id_exclusive", self._stream_order_on_start + 1
- )
-
- INSERT_CLUMP_SIZE = 1000
-
- def add_membership_profile_txn(txn):
- sql = """
- SELECT stream_ordering, event_id, events.room_id, event_json.json
- FROM events
- INNER JOIN event_json USING (event_id)
- INNER JOIN room_memberships USING (event_id)
- WHERE ? <= stream_ordering AND stream_ordering < ?
- AND type = 'm.room.member'
- ORDER BY stream_ordering DESC
- LIMIT ?
- """
-
- txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
-
- rows = self.cursor_to_dict(txn)
- if not rows:
- return 0
-
- min_stream_id = rows[-1]["stream_ordering"]
-
- to_update = []
- for row in rows:
- event_id = row["event_id"]
- room_id = row["room_id"]
- try:
- event_json = json.loads(row["json"])
- content = event_json["content"]
- except Exception:
- continue
-
- display_name = content.get("displayname", None)
- avatar_url = content.get("avatar_url", None)
-
- if display_name or avatar_url:
- to_update.append((display_name, avatar_url, event_id, room_id))
-
- to_update_sql = """
- UPDATE room_memberships SET display_name = ?, avatar_url = ?
- WHERE event_id = ? AND room_id = ?
- """
- for index in range(0, len(to_update), INSERT_CLUMP_SIZE):
- clump = to_update[index : index + INSERT_CLUMP_SIZE]
- txn.executemany(to_update_sql, clump)
-
- progress = {
- "target_min_stream_id_inclusive": target_min_stream_id,
- "max_stream_id_exclusive": min_stream_id,
- }
-
- self._background_update_progress_txn(
- txn, _MEMBERSHIP_PROFILE_UPDATE_NAME, progress
- )
-
- return len(rows)
-
- result = yield self.runInteraction(
- _MEMBERSHIP_PROFILE_UPDATE_NAME, add_membership_profile_txn
- )
-
- if not result:
- yield self._end_background_update(_MEMBERSHIP_PROFILE_UPDATE_NAME)
-
- return result
-
- @defer.inlineCallbacks
- def _background_current_state_membership(self, progress, batch_size):
- """Update the new membership column on current_state_events.
-
- This works by iterating over all rooms in alphebetical order.
- """
-
- def _background_current_state_membership_txn(txn, last_processed_room):
- processed = 0
- while processed < batch_size:
- txn.execute(
- """
- SELECT MIN(room_id) FROM current_state_events WHERE room_id > ?
- """,
- (last_processed_room,),
- )
- row = txn.fetchone()
- if not row or not row[0]:
- return processed, True
-
- next_room, = row
-
- sql = """
- UPDATE current_state_events
- SET membership = (
- SELECT membership FROM room_memberships
- WHERE event_id = current_state_events.event_id
- )
- WHERE room_id = ?
- """
- txn.execute(sql, (next_room,))
- processed += txn.rowcount
-
- last_processed_room = next_room
-
- self._background_update_progress_txn(
- txn,
- _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
- {"last_processed_room": last_processed_room},
- )
-
- return processed, False
-
- # If we haven't got a last processed room then just use the empty
- # string, which will compare before all room IDs correctly.
- last_processed_room = progress.get("last_processed_room", "")
-
- row_count, finished = yield self.runInteraction(
- "_background_current_state_membership_update",
- _background_current_state_membership_txn,
- last_processed_room,
- )
-
- if finished:
- yield self._end_background_update(_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME)
-
- return row_count
-
-
-class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
- def __init__(self, db_conn, hs):
- super(RoomMemberStore, self).__init__(db_conn, hs)
-
- def _store_room_members_txn(self, txn, events, backfilled):
- """Store a room member in the database.
- """
- self._simple_insert_many_txn(
- txn,
- table="room_memberships",
- values=[
- {
- "event_id": event.event_id,
- "user_id": event.state_key,
- "sender": event.user_id,
- "room_id": event.room_id,
- "membership": event.membership,
- "display_name": event.content.get("displayname", None),
- "avatar_url": event.content.get("avatar_url", None),
- }
- for event in events
- ],
- )
-
- for event in events:
- txn.call_after(
- self._membership_stream_cache.entity_has_changed,
- event.state_key,
- event.internal_metadata.stream_ordering,
- )
- txn.call_after(
- self.get_invited_rooms_for_user.invalidate, (event.state_key,)
- )
-
- # We update the local_invites table only if the event is "current",
- # i.e., its something that has just happened. If the event is an
- # outlier it is only current if its an "out of band membership",
- # like a remote invite or a rejection of a remote invite.
- is_new_state = not backfilled and (
- not event.internal_metadata.is_outlier()
- or event.internal_metadata.is_out_of_band_membership()
- )
- is_mine = self.hs.is_mine_id(event.state_key)
- if is_new_state and is_mine:
- if event.membership == Membership.INVITE:
- self._simple_insert_txn(
- txn,
- table="local_invites",
- values={
- "event_id": event.event_id,
- "invitee": event.state_key,
- "inviter": event.sender,
- "room_id": event.room_id,
- "stream_id": event.internal_metadata.stream_ordering,
- },
- )
- else:
- sql = (
- "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE"
- " room_id = ? AND invitee = ? AND locally_rejected is NULL"
- " AND replaced_by is NULL"
- )
-
- txn.execute(
- sql,
- (
- event.internal_metadata.stream_ordering,
- event.event_id,
- event.room_id,
- event.state_key,
- ),
- )
-
- @defer.inlineCallbacks
- def locally_reject_invite(self, user_id, room_id):
- sql = (
- "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE"
- " room_id = ? AND invitee = ? AND locally_rejected is NULL"
- " AND replaced_by is NULL"
- )
-
- def f(txn, stream_ordering):
- txn.execute(sql, (stream_ordering, True, room_id, user_id))
-
- with self._stream_id_gen.get_next() as stream_ordering:
- yield self.runInteraction("locally_reject_invite", f, stream_ordering)
-
- def forget(self, user_id, room_id):
- """Indicate that user_id wishes to discard history for room_id."""
-
- def f(txn):
- sql = (
- "UPDATE"
- " room_memberships"
- " SET"
- " forgotten = 1"
- " WHERE"
- " user_id = ?"
- " AND"
- " room_id = ?"
- )
- txn.execute(sql, (user_id, room_id))
-
- self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
- self._invalidate_cache_and_stream(
- txn, self.get_forgotten_rooms_for_user, (user_id,)
- )
-
- return self.runInteraction("forget_membership", f)
-
-
-class _JoinedHostsCache(object):
- """Cache for joined hosts in a room that is optimised to handle updates
- via state deltas.
- """
-
- def __init__(self, store, room_id):
- self.store = store
- self.room_id = room_id
-
- self.hosts_to_joined_users = {}
-
- self.state_group = object()
-
- self.linearizer = Linearizer("_JoinedHostsCache")
-
- self._len = 0
-
- @defer.inlineCallbacks
- def get_destinations(self, state_entry):
- """Get set of destinations for a state entry
-
- Args:
- state_entry(synapse.state._StateCacheEntry)
- """
- if state_entry.state_group == self.state_group:
- return frozenset(self.hosts_to_joined_users)
-
- with (yield self.linearizer.queue(())):
- if state_entry.state_group == self.state_group:
- pass
- elif state_entry.prev_group == self.state_group:
- for (typ, state_key), event_id in iteritems(state_entry.delta_ids):
- if typ != EventTypes.Member:
- continue
-
- host = intern_string(get_domain_from_id(state_key))
- user_id = state_key
- known_joins = self.hosts_to_joined_users.setdefault(host, set())
-
- event = yield self.store.get_event(event_id)
- if event.membership == Membership.JOIN:
- known_joins.add(user_id)
- else:
- known_joins.discard(user_id)
-
- if not known_joins:
- self.hosts_to_joined_users.pop(host, None)
- else:
- joined_users = yield self.store.get_joined_users_from_state(
- self.room_id, state_entry
- )
-
- self.hosts_to_joined_users = {}
- for user_id in joined_users:
- host = intern_string(get_domain_from_id(user_id))
- self.hosts_to_joined_users.setdefault(host, set()).add(user_id)
-
- if state_entry.state_group:
- self.state_group = state_entry.state_group
- else:
- self.state_group = object()
- self._len = sum(len(v) for v in itervalues(self.hosts_to_joined_users))
- return frozenset(self.hosts_to_joined_users)
-
- def __len__(self):
- return self._len
diff --git a/synapse/storage/schema/delta/35/00background_updates_add_col.sql b/synapse/storage/schema/delta/35/00background_updates_add_col.sql
new file mode 100644
index 0000000000..c2d2a4f836
--- /dev/null
+++ b/synapse/storage/schema/delta/35/00background_updates_add_col.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ALTER TABLE background_updates ADD COLUMN depends_on TEXT;
diff --git a/synapse/storage/schema/full_schemas/54/full.sql b/synapse/storage/schema/full_schemas/54/full.sql
new file mode 100644
index 0000000000..1005880466
--- /dev/null
+++ b/synapse/storage/schema/full_schemas/54/full.sql
@@ -0,0 +1,8 @@
+
+
+CREATE TABLE background_updates (
+ update_name text NOT NULL,
+ progress_json text NOT NULL,
+ depends_on text,
+ CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
+);
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index a941a5ae3f..a2df8fa827 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -14,45 +14,16 @@
# limitations under the License.
import logging
-from collections import namedtuple
from six import iteritems, itervalues
-from six.moves import range
import attr
-from twisted.internet import defer
-
from synapse.api.constants import EventTypes
-from synapse.api.errors import NotFoundError
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.background_updates import BackgroundUpdateStore
-from synapse.storage.engines import PostgresEngine
-from synapse.storage.events_worker import EventsWorkerStore
-from synapse.util.caches import get_cache_factor_for, intern_string
-from synapse.util.caches.descriptors import cached, cachedList
-from synapse.util.caches.dictionary_cache import DictionaryCache
-from synapse.util.stringutils import to_ascii
logger = logging.getLogger(__name__)
-MAX_STATE_DELTA_HOPS = 100
-
-
-class _GetStateGroupDelta(
- namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))
-):
- """Return type of get_state_group_delta that implements __len__, which lets
- us use the itrable flag when caching
- """
-
- __slots__ = []
-
- def __len__(self):
- return len(self.delta_ids) if self.delta_ids else 0
-
-
@attr.s(slots=True)
class StateFilter(object):
"""A filter used when querying for state.
@@ -351,1195 +322,3 @@ class StateFilter(object):
)
return member_filter, non_member_filter
-
-
-class StateGroupBackgroundUpdateStore(SQLBaseStore):
- """Defines functions related to state groups needed to run the state backgroud
- updates.
- """
-
- def _count_state_group_hops_txn(self, txn, state_group):
- """Given a state group, count how many hops there are in the tree.
-
- This is used to ensure the delta chains don't get too long.
- """
- if isinstance(self.database_engine, PostgresEngine):
- sql = """
- WITH RECURSIVE state(state_group) AS (
- VALUES(?::bigint)
- UNION ALL
- SELECT prev_state_group FROM state_group_edges e, state s
- WHERE s.state_group = e.state_group
- )
- SELECT count(*) FROM state;
- """
-
- txn.execute(sql, (state_group,))
- row = txn.fetchone()
- if row and row[0]:
- return row[0]
- else:
- return 0
- else:
- # We don't use WITH RECURSIVE on sqlite3 as there are distributions
- # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
- next_group = state_group
- count = 0
-
- while next_group:
- next_group = self._simple_select_one_onecol_txn(
- txn,
- table="state_group_edges",
- keyvalues={"state_group": next_group},
- retcol="prev_state_group",
- allow_none=True,
- )
- if next_group:
- count += 1
-
- return count
-
- def _get_state_groups_from_groups_txn(
- self, txn, groups, state_filter=StateFilter.all()
- ):
- results = {group: {} for group in groups}
-
- where_clause, where_args = state_filter.make_sql_filter_clause()
-
- # Unless the filter clause is empty, we're going to append it after an
- # existing where clause
- if where_clause:
- where_clause = " AND (%s)" % (where_clause,)
-
- if isinstance(self.database_engine, PostgresEngine):
- # Temporarily disable sequential scans in this transaction. This is
- # a temporary hack until we can add the right indices in
- txn.execute("SET LOCAL enable_seqscan=off")
-
- # The below query walks the state_group tree so that the "state"
- # table includes all state_groups in the tree. It then joins
- # against `state_groups_state` to fetch the latest state.
- # It assumes that previous state groups are always numerically
- # lesser.
- # The PARTITION is used to get the event_id in the greatest state
- # group for the given type, state_key.
- # This may return multiple rows per (type, state_key), but last_value
- # should be the same.
- sql = """
- WITH RECURSIVE state(state_group) AS (
- VALUES(?::bigint)
- UNION ALL
- SELECT prev_state_group FROM state_group_edges e, state s
- WHERE s.state_group = e.state_group
- )
- SELECT DISTINCT type, state_key, last_value(event_id) OVER (
- PARTITION BY type, state_key ORDER BY state_group ASC
- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
- ) AS event_id FROM state_groups_state
- WHERE state_group IN (
- SELECT state_group FROM state
- )
- """
-
- for group in groups:
- args = [group]
- args.extend(where_args)
-
- txn.execute(sql + where_clause, args)
- for row in txn:
- typ, state_key, event_id = row
- key = (typ, state_key)
- results[group][key] = event_id
- else:
- max_entries_returned = state_filter.max_entries_returned()
-
- # We don't use WITH RECURSIVE on sqlite3 as there are distributions
- # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
- for group in groups:
- next_group = group
-
- while next_group:
- # We did this before by getting the list of group ids, and
- # then passing that list to sqlite to get latest event for
- # each (type, state_key). However, that was terribly slow
- # without the right indices (which we can't add until
- # after we finish deduping state, which requires this func)
- args = [next_group]
- args.extend(where_args)
-
- txn.execute(
- "SELECT type, state_key, event_id FROM state_groups_state"
- " WHERE state_group = ? " + where_clause,
- args,
- )
- results[group].update(
- ((typ, state_key), event_id)
- for typ, state_key, event_id in txn
- if (typ, state_key) not in results[group]
- )
-
- # If the number of entries in the (type,state_key)->event_id dict
- # matches the number of (type,state_keys) types we were searching
- # for, then we must have found them all, so no need to go walk
- # further down the tree... UNLESS our types filter contained
- # wildcards (i.e. Nones) in which case we have to do an exhaustive
- # search
- if (
- max_entries_returned is not None
- and len(results[group]) == max_entries_returned
- ):
- break
-
- next_group = self._simple_select_one_onecol_txn(
- txn,
- table="state_group_edges",
- keyvalues={"state_group": next_group},
- retcol="prev_state_group",
- allow_none=True,
- )
-
- return results
-
-
-# this inherits from EventsWorkerStore because it calls self.get_events
-class StateGroupWorkerStore(
- EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore
-):
- """The parts of StateGroupStore that can be called from workers.
- """
-
- STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
- STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
- CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
-
- def __init__(self, db_conn, hs):
- super(StateGroupWorkerStore, self).__init__(db_conn, hs)
-
- # Originally the state store used a single DictionaryCache to cache the
- # event IDs for the state types in a given state group to avoid hammering
- # on the state_group* tables.
- #
- # The point of using a DictionaryCache is that it can cache a subset
- # of the state events for a given state group (i.e. a subset of the keys for a
- # given dict which is an entry in the cache for a given state group ID).
- #
- # However, this poses problems when performing complicated queries
- # on the store - for instance: "give me all the state for this group, but
- # limit members to this subset of users", as DictionaryCache's API isn't
- # rich enough to say "please cache any of these fields, apart from this subset".
- # This is problematic when lazy loading members, which requires this behaviour,
- # as without it the cache has no choice but to speculatively load all
- # state events for the group, which negates the efficiency being sought.
- #
- # Rather than overcomplicating DictionaryCache's API, we instead split the
- # state_group_cache into two halves - one for tracking non-member events,
- # and the other for tracking member_events. This means that lazy loading
- # queries can be made in a cache-friendly manner by querying both caches
- # separately and then merging the result. So for the example above, you
- # would query the members cache for a specific subset of state keys
- # (which DictionaryCache will handle efficiently and fine) and the non-members
- # cache for all state (which DictionaryCache will similarly handle fine)
- # and then just merge the results together.
- #
- # We size the non-members cache to be smaller than the members cache as the
- # vast majority of state in Matrix (today) is member events.
-
- self._state_group_cache = DictionaryCache(
- "*stateGroupCache*",
- # TODO: this hasn't been tuned yet
- 50000 * get_cache_factor_for("stateGroupCache"),
- )
- self._state_group_members_cache = DictionaryCache(
- "*stateGroupMembersCache*",
- 500000 * get_cache_factor_for("stateGroupMembersCache"),
- )
-
- @defer.inlineCallbacks
- def get_room_version(self, room_id):
- """Get the room_version of a given room
-
- Args:
- room_id (str)
-
- Returns:
- Deferred[str]
-
- Raises:
- NotFoundError if the room is unknown
- """
- # for now we do this by looking at the create event. We may want to cache this
- # more intelligently in future.
-
- # Retrieve the room's create event
- create_event = yield self.get_create_event_for_room(room_id)
- return create_event.content.get("room_version", "1")
-
- @defer.inlineCallbacks
- def get_room_predecessor(self, room_id):
- """Get the predecessor room of an upgraded room if one exists.
- Otherwise return None.
-
- Args:
- room_id (str)
-
- Returns:
- Deferred[unicode|None]: predecessor room id
-
- Raises:
- NotFoundError if the room is unknown
- """
- # Retrieve the room's create event
- create_event = yield self.get_create_event_for_room(room_id)
-
- # Return predecessor if present
- return create_event.content.get("predecessor", None)
-
- @defer.inlineCallbacks
- def get_create_event_for_room(self, room_id):
- """Get the create state event for a room.
-
- Args:
- room_id (str)
-
- Returns:
- Deferred[EventBase]: The room creation event.
-
- Raises:
- NotFoundError if the room is unknown
- """
- state_ids = yield self.get_current_state_ids(room_id)
- create_id = state_ids.get((EventTypes.Create, ""))
-
- # If we can't find the create event, assume we've hit a dead end
- if not create_id:
- raise NotFoundError("Unknown room %s" % (room_id))
-
- # Retrieve the room's create event and return
- create_event = yield self.get_event(create_id)
- return create_event
-
- @cached(max_entries=100000, iterable=True)
- def get_current_state_ids(self, room_id):
- """Get the current state event ids for a room based on the
- current_state_events table.
-
- Args:
- room_id (str)
-
- Returns:
- deferred: dict of (type, state_key) -> event_id
- """
-
- def _get_current_state_ids_txn(txn):
- txn.execute(
- """SELECT type, state_key, event_id FROM current_state_events
- WHERE room_id = ?
- """,
- (room_id,),
- )
-
- return {
- (intern_string(r[0]), intern_string(r[1])): to_ascii(r[2]) for r in txn
- }
-
- return self.runInteraction("get_current_state_ids", _get_current_state_ids_txn)
-
- # FIXME: how should this be cached?
- def get_filtered_current_state_ids(self, room_id, state_filter=StateFilter.all()):
- """Get the current state event of a given type for a room based on the
- current_state_events table. This may not be as up-to-date as the result
- of doing a fresh state resolution as per state_handler.get_current_state
-
- Args:
- room_id (str)
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- Deferred[dict[tuple[str, str], str]]: Map from type/state_key to
- event ID.
- """
-
- where_clause, where_args = state_filter.make_sql_filter_clause()
-
- if not where_clause:
- # We delegate to the cached version
- return self.get_current_state_ids(room_id)
-
- def _get_filtered_current_state_ids_txn(txn):
- results = {}
- sql = """
- SELECT type, state_key, event_id FROM current_state_events
- WHERE room_id = ?
- """
-
- if where_clause:
- sql += " AND (%s)" % (where_clause,)
-
- args = [room_id]
- args.extend(where_args)
- txn.execute(sql, args)
- for row in txn:
- typ, state_key, event_id = row
- key = (intern_string(typ), intern_string(state_key))
- results[key] = event_id
-
- return results
-
- return self.runInteraction(
- "get_filtered_current_state_ids", _get_filtered_current_state_ids_txn
- )
-
- @defer.inlineCallbacks
- def get_canonical_alias_for_room(self, room_id):
- """Get canonical alias for room, if any
-
- Args:
- room_id (str)
-
- Returns:
- Deferred[str|None]: The canonical alias, if any
- """
-
- state = yield self.get_filtered_current_state_ids(
- room_id, StateFilter.from_types([(EventTypes.CanonicalAlias, "")])
- )
-
- event_id = state.get((EventTypes.CanonicalAlias, ""))
- if not event_id:
- return
-
- event = yield self.get_event(event_id, allow_none=True)
- if not event:
- return
-
- return event.content.get("canonical_alias")
-
- @cached(max_entries=10000, iterable=True)
- def get_state_group_delta(self, state_group):
- """Given a state group try to return a previous group and a delta between
- the old and the new.
-
- Returns:
- (prev_group, delta_ids), where both may be None.
- """
-
- def _get_state_group_delta_txn(txn):
- prev_group = self._simple_select_one_onecol_txn(
- txn,
- table="state_group_edges",
- keyvalues={"state_group": state_group},
- retcol="prev_state_group",
- allow_none=True,
- )
-
- if not prev_group:
- return _GetStateGroupDelta(None, None)
-
- delta_ids = self._simple_select_list_txn(
- txn,
- table="state_groups_state",
- keyvalues={"state_group": state_group},
- retcols=("type", "state_key", "event_id"),
- )
-
- return _GetStateGroupDelta(
- prev_group,
- {(row["type"], row["state_key"]): row["event_id"] for row in delta_ids},
- )
-
- return self.runInteraction("get_state_group_delta", _get_state_group_delta_txn)
-
- @defer.inlineCallbacks
- def get_state_groups_ids(self, _room_id, event_ids):
- """Get the event IDs of all the state for the state groups for the given events
-
- Args:
- _room_id (str): id of the room for these events
- event_ids (iterable[str]): ids of the events
-
- Returns:
- Deferred[dict[int, dict[tuple[str, str], str]]]:
- dict of state_group_id -> (dict of (type, state_key) -> event id)
- """
- if not event_ids:
- return {}
-
- event_to_groups = yield self._get_state_group_for_events(event_ids)
-
- groups = set(itervalues(event_to_groups))
- group_to_state = yield self._get_state_for_groups(groups)
-
- return group_to_state
-
- @defer.inlineCallbacks
- def get_state_ids_for_group(self, state_group):
- """Get the event IDs of all the state in the given state group
-
- Args:
- state_group (int)
-
- Returns:
- Deferred[dict]: Resolves to a map of (type, state_key) -> event_id
- """
- group_to_state = yield self._get_state_for_groups((state_group,))
-
- return group_to_state[state_group]
-
- @defer.inlineCallbacks
- def get_state_groups(self, room_id, event_ids):
- """ Get the state groups for the given list of event_ids
-
- Returns:
- Deferred[dict[int, list[EventBase]]]:
- dict of state_group_id -> list of state events.
- """
- if not event_ids:
- return {}
-
- group_to_ids = yield self.get_state_groups_ids(room_id, event_ids)
-
- state_event_map = yield self.get_events(
- [
- ev_id
- for group_ids in itervalues(group_to_ids)
- for ev_id in itervalues(group_ids)
- ],
- get_prev_content=False,
- )
-
- return {
- group: [
- state_event_map[v]
- for v in itervalues(event_id_map)
- if v in state_event_map
- ]
- for group, event_id_map in iteritems(group_to_ids)
- }
-
- @defer.inlineCallbacks
- def _get_state_groups_from_groups(self, groups, state_filter):
- """Returns the state groups for a given set of groups, filtering on
- types of state events.
-
- Args:
- groups(list[int]): list of state group IDs to query
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
- Returns:
- Deferred[dict[int, dict[tuple[str, str], str]]]:
- dict of state_group_id -> (dict of (type, state_key) -> event id)
- """
- results = {}
-
- chunks = [groups[i : i + 100] for i in range(0, len(groups), 100)]
- for chunk in chunks:
- res = yield self.runInteraction(
- "_get_state_groups_from_groups",
- self._get_state_groups_from_groups_txn,
- chunk,
- state_filter,
- )
- results.update(res)
-
- return results
-
- @defer.inlineCallbacks
- def get_state_for_events(self, event_ids, state_filter=StateFilter.all()):
- """Given a list of event_ids and type tuples, return a list of state
- dicts for each event.
-
- Args:
- event_ids (list[string])
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- deferred: A dict of (event_id) -> (type, state_key) -> [state_events]
- """
- event_to_groups = yield self._get_state_group_for_events(event_ids)
-
- groups = set(itervalues(event_to_groups))
- group_to_state = yield self._get_state_for_groups(groups, state_filter)
-
- state_event_map = yield self.get_events(
- [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)],
- get_prev_content=False,
- )
-
- event_to_state = {
- event_id: {
- k: state_event_map[v]
- for k, v in iteritems(group_to_state[group])
- if v in state_event_map
- }
- for event_id, group in iteritems(event_to_groups)
- }
-
- return {event: event_to_state[event] for event in event_ids}
-
- @defer.inlineCallbacks
- def get_state_ids_for_events(self, event_ids, state_filter=StateFilter.all()):
- """
- Get the state dicts corresponding to a list of events, containing the event_ids
- of the state events (as opposed to the events themselves)
-
- Args:
- event_ids(list(str)): events whose state should be returned
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- A deferred dict from event_id -> (type, state_key) -> event_id
- """
- event_to_groups = yield self._get_state_group_for_events(event_ids)
-
- groups = set(itervalues(event_to_groups))
- group_to_state = yield self._get_state_for_groups(groups, state_filter)
-
- event_to_state = {
- event_id: group_to_state[group]
- for event_id, group in iteritems(event_to_groups)
- }
-
- return {event: event_to_state[event] for event in event_ids}
-
- @defer.inlineCallbacks
- def get_state_for_event(self, event_id, state_filter=StateFilter.all()):
- """
- Get the state dict corresponding to a particular event
-
- Args:
- event_id(str): event whose state should be returned
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- A deferred dict from (type, state_key) -> state_event
- """
- state_map = yield self.get_state_for_events([event_id], state_filter)
- return state_map[event_id]
-
- @defer.inlineCallbacks
- def get_state_ids_for_event(self, event_id, state_filter=StateFilter.all()):
- """
- Get the state dict corresponding to a particular event
-
- Args:
- event_id(str): event whose state should be returned
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- A deferred dict from (type, state_key) -> state_event
- """
- state_map = yield self.get_state_ids_for_events([event_id], state_filter)
- return state_map[event_id]
-
- @cached(max_entries=50000)
- def _get_state_group_for_event(self, event_id):
- return self._simple_select_one_onecol(
- table="event_to_state_groups",
- keyvalues={"event_id": event_id},
- retcol="state_group",
- allow_none=True,
- desc="_get_state_group_for_event",
- )
-
- @cachedList(
- cached_method_name="_get_state_group_for_event",
- list_name="event_ids",
- num_args=1,
- inlineCallbacks=True,
- )
- def _get_state_group_for_events(self, event_ids):
- """Returns mapping event_id -> state_group
- """
- rows = yield self._simple_select_many_batch(
- table="event_to_state_groups",
- column="event_id",
- iterable=event_ids,
- keyvalues={},
- retcols=("event_id", "state_group"),
- desc="_get_state_group_for_events",
- )
-
- return {row["event_id"]: row["state_group"] for row in rows}
-
- def _get_state_for_group_using_cache(self, cache, group, state_filter):
- """Checks if group is in cache. See `_get_state_for_groups`
-
- Args:
- cache(DictionaryCache): the state group cache to use
- group(int): The state group to lookup
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns 2-tuple (`state_dict`, `got_all`).
- `got_all` is a bool indicating if we successfully retrieved all
- requests state from the cache, if False we need to query the DB for the
- missing state.
- """
- is_all, known_absent, state_dict_ids = cache.get(group)
-
- if is_all or state_filter.is_full():
- # Either we have everything or want everything, either way
- # `is_all` tells us whether we've gotten everything.
- return state_filter.filter_state(state_dict_ids), is_all
-
- # tracks whether any of our requested types are missing from the cache
- missing_types = False
-
- if state_filter.has_wildcards():
- # We don't know if we fetched all the state keys for the types in
- # the filter that are wildcards, so we have to assume that we may
- # have missed some.
- missing_types = True
- else:
- # There aren't any wild cards, so `concrete_types()` returns the
- # complete list of event types we're wanting.
- for key in state_filter.concrete_types():
- if key not in state_dict_ids and key not in known_absent:
- missing_types = True
- break
-
- return state_filter.filter_state(state_dict_ids), not missing_types
-
- @defer.inlineCallbacks
- def _get_state_for_groups(self, groups, state_filter=StateFilter.all()):
- """Gets the state at each of a list of state groups, optionally
- filtering by type/state_key
-
- Args:
- groups (iterable[int]): list of state groups for which we want
- to get the state.
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
- Returns:
- Deferred[dict[int, dict[tuple[str, str], str]]]:
- dict of state_group_id -> (dict of (type, state_key) -> event id)
- """
-
- member_filter, non_member_filter = state_filter.get_member_split()
-
- # Now we look them up in the member and non-member caches
- non_member_state, incomplete_groups_nm, = (
- yield self._get_state_for_groups_using_cache(
- groups, self._state_group_cache, state_filter=non_member_filter
- )
- )
-
- member_state, incomplete_groups_m, = (
- yield self._get_state_for_groups_using_cache(
- groups, self._state_group_members_cache, state_filter=member_filter
- )
- )
-
- state = dict(non_member_state)
- for group in groups:
- state[group].update(member_state[group])
-
- # Now fetch any missing groups from the database
-
- incomplete_groups = incomplete_groups_m | incomplete_groups_nm
-
- if not incomplete_groups:
- return state
-
- cache_sequence_nm = self._state_group_cache.sequence
- cache_sequence_m = self._state_group_members_cache.sequence
-
- # Help the cache hit ratio by expanding the filter a bit
- db_state_filter = state_filter.return_expanded()
-
- group_to_state_dict = yield self._get_state_groups_from_groups(
- list(incomplete_groups), state_filter=db_state_filter
- )
-
- # Now lets update the caches
- self._insert_into_cache(
- group_to_state_dict,
- db_state_filter,
- cache_seq_num_members=cache_sequence_m,
- cache_seq_num_non_members=cache_sequence_nm,
- )
-
- # And finally update the result dict, by filtering out any extra
- # stuff we pulled out of the database.
- for group, group_state_dict in iteritems(group_to_state_dict):
- # We just replace any existing entries, as we will have loaded
- # everything we need from the database anyway.
- state[group] = state_filter.filter_state(group_state_dict)
-
- return state
-
- def _get_state_for_groups_using_cache(self, groups, cache, state_filter):
- """Gets the state at each of a list of state groups, optionally
- filtering by type/state_key, querying from a specific cache.
-
- Args:
- groups (iterable[int]): list of state groups for which we want
- to get the state.
- cache (DictionaryCache): the cache of group ids to state dicts which
- we will pass through - either the normal state cache or the specific
- members state cache.
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
-
- Returns:
- tuple[dict[int, dict[tuple[str, str], str]], set[int]]: Tuple of
- dict of state_group_id -> (dict of (type, state_key) -> event id)
- of entries in the cache, and the state group ids either missing
- from the cache or incomplete.
- """
- results = {}
- incomplete_groups = set()
- for group in set(groups):
- state_dict_ids, got_all = self._get_state_for_group_using_cache(
- cache, group, state_filter
- )
- results[group] = state_dict_ids
-
- if not got_all:
- incomplete_groups.add(group)
-
- return results, incomplete_groups
-
- def _insert_into_cache(
- self,
- group_to_state_dict,
- state_filter,
- cache_seq_num_members,
- cache_seq_num_non_members,
- ):
- """Inserts results from querying the database into the relevant cache.
-
- Args:
- group_to_state_dict (dict): The new entries pulled from database.
- Map from state group to state dict
- state_filter (StateFilter): The state filter used to fetch state
- from the database.
- cache_seq_num_members (int): Sequence number of member cache since
- last lookup in cache
- cache_seq_num_non_members (int): Sequence number of member cache since
- last lookup in cache
- """
-
- # We need to work out which types we've fetched from the DB for the
- # member vs non-member caches. This should be as accurate as possible,
- # but can be an underestimate (e.g. when we have wild cards)
-
- member_filter, non_member_filter = state_filter.get_member_split()
- if member_filter.is_full():
- # We fetched all member events
- member_types = None
- else:
- # `concrete_types()` will only return a subset when there are wild
- # cards in the filter, but that's fine.
- member_types = member_filter.concrete_types()
-
- if non_member_filter.is_full():
- # We fetched all non member events
- non_member_types = None
- else:
- non_member_types = non_member_filter.concrete_types()
-
- for group, group_state_dict in iteritems(group_to_state_dict):
- state_dict_members = {}
- state_dict_non_members = {}
-
- for k, v in iteritems(group_state_dict):
- if k[0] == EventTypes.Member:
- state_dict_members[k] = v
- else:
- state_dict_non_members[k] = v
-
- self._state_group_members_cache.update(
- cache_seq_num_members,
- key=group,
- value=state_dict_members,
- fetched_keys=member_types,
- )
-
- self._state_group_cache.update(
- cache_seq_num_non_members,
- key=group,
- value=state_dict_non_members,
- fetched_keys=non_member_types,
- )
-
- def store_state_group(
- self, event_id, room_id, prev_group, delta_ids, current_state_ids
- ):
- """Store a new set of state, returning a newly assigned state group.
-
- Args:
- event_id (str): The event ID for which the state was calculated
- room_id (str)
- prev_group (int|None): A previous state group for the room, optional.
- delta_ids (dict|None): The delta between state at `prev_group` and
- `current_state_ids`, if `prev_group` was given. Same format as
- `current_state_ids`.
- current_state_ids (dict): The state to store. Map of (type, state_key)
- to event_id.
-
- Returns:
- Deferred[int]: The state group ID
- """
-
- def _store_state_group_txn(txn):
- if current_state_ids is None:
- # AFAIK, this can never happen
- raise Exception("current_state_ids cannot be None")
-
- state_group = self.database_engine.get_next_state_group_id(txn)
-
- self._simple_insert_txn(
- txn,
- table="state_groups",
- values={"id": state_group, "room_id": room_id, "event_id": event_id},
- )
-
- # We persist as a delta if we can, while also ensuring the chain
- # of deltas isn't tooo long, as otherwise read performance degrades.
- if prev_group:
- is_in_db = self._simple_select_one_onecol_txn(
- txn,
- table="state_groups",
- keyvalues={"id": prev_group},
- retcol="id",
- allow_none=True,
- )
- if not is_in_db:
- raise Exception(
- "Trying to persist state with unpersisted prev_group: %r"
- % (prev_group,)
- )
-
- potential_hops = self._count_state_group_hops_txn(txn, prev_group)
- if prev_group and potential_hops < MAX_STATE_DELTA_HOPS:
- self._simple_insert_txn(
- txn,
- table="state_group_edges",
- values={"state_group": state_group, "prev_state_group": prev_group},
- )
-
- self._simple_insert_many_txn(
- txn,
- table="state_groups_state",
- values=[
- {
- "state_group": state_group,
- "room_id": room_id,
- "type": key[0],
- "state_key": key[1],
- "event_id": state_id,
- }
- for key, state_id in iteritems(delta_ids)
- ],
- )
- else:
- self._simple_insert_many_txn(
- txn,
- table="state_groups_state",
- values=[
- {
- "state_group": state_group,
- "room_id": room_id,
- "type": key[0],
- "state_key": key[1],
- "event_id": state_id,
- }
- for key, state_id in iteritems(current_state_ids)
- ],
- )
-
- # Prefill the state group caches with this group.
- # It's fine to use the sequence like this as the state group map
- # is immutable. (If the map wasn't immutable then this prefill could
- # race with another update)
-
- current_member_state_ids = {
- s: ev
- for (s, ev) in iteritems(current_state_ids)
- if s[0] == EventTypes.Member
- }
- txn.call_after(
- self._state_group_members_cache.update,
- self._state_group_members_cache.sequence,
- key=state_group,
- value=dict(current_member_state_ids),
- )
-
- current_non_member_state_ids = {
- s: ev
- for (s, ev) in iteritems(current_state_ids)
- if s[0] != EventTypes.Member
- }
- txn.call_after(
- self._state_group_cache.update,
- self._state_group_cache.sequence,
- key=state_group,
- value=dict(current_non_member_state_ids),
- )
-
- return state_group
-
- return self.runInteraction("store_state_group", _store_state_group_txn)
-
-
-class StateBackgroundUpdateStore(
- StateGroupBackgroundUpdateStore, BackgroundUpdateStore
-):
-
- STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
- STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
- CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
- EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
-
- def __init__(self, db_conn, hs):
- super(StateBackgroundUpdateStore, self).__init__(db_conn, hs)
- self.register_background_update_handler(
- self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME,
- self._background_deduplicate_state,
- )
- self.register_background_update_handler(
- self.STATE_GROUP_INDEX_UPDATE_NAME, self._background_index_state
- )
- self.register_background_index_update(
- self.CURRENT_STATE_INDEX_UPDATE_NAME,
- index_name="current_state_events_member_index",
- table="current_state_events",
- columns=["state_key"],
- where_clause="type='m.room.member'",
- )
- self.register_background_index_update(
- self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME,
- index_name="event_to_state_groups_sg_index",
- table="event_to_state_groups",
- columns=["state_group"],
- )
-
- @defer.inlineCallbacks
- def _background_deduplicate_state(self, progress, batch_size):
- """This background update will slowly deduplicate state by reencoding
- them as deltas.
- """
- last_state_group = progress.get("last_state_group", 0)
- rows_inserted = progress.get("rows_inserted", 0)
- max_group = progress.get("max_group", None)
-
- BATCH_SIZE_SCALE_FACTOR = 100
-
- batch_size = max(1, int(batch_size / BATCH_SIZE_SCALE_FACTOR))
-
- if max_group is None:
- rows = yield self._execute(
- "_background_deduplicate_state",
- None,
- "SELECT coalesce(max(id), 0) FROM state_groups",
- )
- max_group = rows[0][0]
-
- def reindex_txn(txn):
- new_last_state_group = last_state_group
- for count in range(batch_size):
- txn.execute(
- "SELECT id, room_id FROM state_groups"
- " WHERE ? < id AND id <= ?"
- " ORDER BY id ASC"
- " LIMIT 1",
- (new_last_state_group, max_group),
- )
- row = txn.fetchone()
- if row:
- state_group, room_id = row
-
- if not row or not state_group:
- return True, count
-
- txn.execute(
- "SELECT state_group FROM state_group_edges"
- " WHERE state_group = ?",
- (state_group,),
- )
-
- # If we reach a point where we've already started inserting
- # edges we should stop.
- if txn.fetchall():
- return True, count
-
- txn.execute(
- "SELECT coalesce(max(id), 0) FROM state_groups"
- " WHERE id < ? AND room_id = ?",
- (state_group, room_id),
- )
- prev_group, = txn.fetchone()
- new_last_state_group = state_group
-
- if prev_group:
- potential_hops = self._count_state_group_hops_txn(txn, prev_group)
- if potential_hops >= MAX_STATE_DELTA_HOPS:
- # We want to ensure chains are at most this long,#
- # otherwise read performance degrades.
- continue
-
- prev_state = self._get_state_groups_from_groups_txn(
- txn, [prev_group]
- )
- prev_state = prev_state[prev_group]
-
- curr_state = self._get_state_groups_from_groups_txn(
- txn, [state_group]
- )
- curr_state = curr_state[state_group]
-
- if not set(prev_state.keys()) - set(curr_state.keys()):
- # We can only do a delta if the current has a strict super set
- # of keys
-
- delta_state = {
- key: value
- for key, value in iteritems(curr_state)
- if prev_state.get(key, None) != value
- }
-
- self._simple_delete_txn(
- txn,
- table="state_group_edges",
- keyvalues={"state_group": state_group},
- )
-
- self._simple_insert_txn(
- txn,
- table="state_group_edges",
- values={
- "state_group": state_group,
- "prev_state_group": prev_group,
- },
- )
-
- self._simple_delete_txn(
- txn,
- table="state_groups_state",
- keyvalues={"state_group": state_group},
- )
-
- self._simple_insert_many_txn(
- txn,
- table="state_groups_state",
- values=[
- {
- "state_group": state_group,
- "room_id": room_id,
- "type": key[0],
- "state_key": key[1],
- "event_id": state_id,
- }
- for key, state_id in iteritems(delta_state)
- ],
- )
-
- progress = {
- "last_state_group": state_group,
- "rows_inserted": rows_inserted + batch_size,
- "max_group": max_group,
- }
-
- self._background_update_progress_txn(
- txn, self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, progress
- )
-
- return False, batch_size
-
- finished, result = yield self.runInteraction(
- self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, reindex_txn
- )
-
- if finished:
- yield self._end_background_update(
- self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME
- )
-
- return result * BATCH_SIZE_SCALE_FACTOR
-
- @defer.inlineCallbacks
- def _background_index_state(self, progress, batch_size):
- def reindex_txn(conn):
- conn.rollback()
- if isinstance(self.database_engine, PostgresEngine):
- # postgres insists on autocommit for the index
- conn.set_session(autocommit=True)
- try:
- txn = conn.cursor()
- txn.execute(
- "CREATE INDEX CONCURRENTLY state_groups_state_type_idx"
- " ON state_groups_state(state_group, type, state_key)"
- )
- txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
- finally:
- conn.set_session(autocommit=False)
- else:
- txn = conn.cursor()
- txn.execute(
- "CREATE INDEX state_groups_state_type_idx"
- " ON state_groups_state(state_group, type, state_key)"
- )
- txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
-
- yield self.runWithConnection(reindex_txn)
-
- yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME)
-
- return 1
-
-
-class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore):
- """ Keeps track of the state at a given event.
-
- This is done by the concept of `state groups`. Every event is a assigned
- a state group (identified by an arbitrary string), which references a
- collection of state events. The current state of an event is then the
- collection of state events referenced by the event's state group.
-
- Hence, every change in the current state causes a new state group to be
- generated. However, if no change happens (e.g., if we get a message event
- with only one parent it inherits the state group from its parent.)
-
- There are three tables:
- * `state_groups`: Stores group name, first event with in the group and
- room id.
- * `event_to_state_groups`: Maps events to state groups.
- * `state_groups_state`: Maps state group to state events.
- """
-
- def __init__(self, db_conn, hs):
- super(StateStore, self).__init__(db_conn, hs)
-
- def _store_event_state_mappings_txn(self, txn, events_and_contexts):
- state_groups = {}
- for event, context in events_and_contexts:
- if event.internal_metadata.is_outlier():
- continue
-
- # if the event was rejected, just give it the same state as its
- # predecessor.
- if context.rejected:
- state_groups[event.event_id] = context.prev_group
- continue
-
- state_groups[event.event_id] = context.state_group
-
- self._simple_insert_many_txn(
- txn,
- table="event_to_state_groups",
- values=[
- {"state_group": state_group_id, "event_id": event_id}
- for event_id, state_group_id in iteritems(state_groups)
- ],
- )
-
- for event_id, state_group_id in iteritems(state_groups):
- txn.call_after(
- self._get_state_group_for_event.prefill, (event_id,), state_group_id
- )
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
index 7569b6fab5..d5c8bd7612 100644
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -13,9 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from synapse import storage
from synapse.rest import admin
from synapse.rest.client.v1 import login, room
+from synapse.storage.data_stores.main import stats
from tests import unittest
@@ -87,10 +87,10 @@ class StatsRoomTests(unittest.HomeserverTestCase):
)
def _get_current_stats(self, stats_type, stat_id):
- table, id_col = storage.stats.TYPE_TO_TABLE[stats_type]
+ table, id_col = stats.TYPE_TO_TABLE[stats_type]
- cols = list(storage.stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
- storage.stats.PER_SLICE_FIELDS[stats_type]
+ cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
+ stats.PER_SLICE_FIELDS[stats_type]
)
end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000)
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 622b16a071..dfeea24599 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -24,7 +24,7 @@ from twisted.internet import defer
from synapse.appservice import ApplicationService, ApplicationServiceState
from synapse.config._base import ConfigError
-from synapse.storage.appservice import (
+from synapse.storage.data_stores.main.appservice import (
ApplicationServiceStore,
ApplicationServiceTransactionStore,
)
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index 34f9c72709..69dcaa63d5 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -50,6 +50,8 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
schema_path = os.path.join(
prepare_database.dir_path,
+ "data_stores",
+ "main",
"schema",
"delta",
"54",
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index 45824bd3b2..24c7fe16c3 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -16,7 +16,7 @@
from twisted.internet import defer
-from synapse.storage.profile import ProfileStore
+from synapse.storage.data_stores.main.profile import ProfileStore
from synapse.types import UserID
from tests import unittest
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index d7d244ce97..7eea57c0e2 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -15,7 +15,7 @@
from twisted.internet import defer
-from synapse.storage import UserDirectoryStore
+from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
from tests import unittest
from tests.utils import setup_test_homeserver
|