diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 4415909414..4d1b92d1aa 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -24,7 +24,7 @@ from twisted.enterprise.adbapi import Connection
from synapse.logging.opentracing import log_kv, set_tag, trace
from synapse.storage._base import SQLBaseStore, db_to_json
-from synapse.storage.database import make_in_list_sql_clause
+from synapse.storage.database import DatabasePool, make_in_list_sql_clause
from synapse.storage.types import Cursor
from synapse.types import JsonDict
from synapse.util import json_encoder
@@ -33,6 +33,7 @@ from synapse.util.iterutils import batch_iter
if TYPE_CHECKING:
from synapse.handlers.e2e_keys import SignatureListItem
+ from synapse.server import HomeServer
@attr.s(slots=True)
@@ -47,7 +48,20 @@ class DeviceKeyLookupResult:
keys = attr.ib(type=Optional[JsonDict])
-class EndToEndKeyWorkerStore(SQLBaseStore):
+class EndToEndKeyBackgroundStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"):
+ super().__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_index_update(
+ "e2e_cross_signing_keys_idx",
+ index_name="e2e_cross_signing_keys_stream_idx",
+ table="e2e_cross_signing_keys",
+ columns=["stream_id"],
+ unique=True,
+ )
+
+
+class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore):
async def get_e2e_device_keys_for_federation_query(
self, user_id: str
) -> Tuple[int, List[JsonDict]]:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index a6279a6c13..2e07c37340 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -26,6 +26,7 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore
from synapse.storage.databases.main.signatures import SignatureWorkerStore
from synapse.types import Collection
from synapse.util.caches.descriptors import cached
+from synapse.util.caches.lrucache import LruCache
from synapse.util.iterutils import batch_iter
logger = logging.getLogger(__name__)
@@ -40,6 +41,11 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
self._delete_old_forward_extrem_cache, 60 * 60 * 1000
)
+ # Cache of event ID to list of auth event IDs and their depths.
+ self._event_auth_cache = LruCache(
+ 500000, "_event_auth_cache", size_callback=len
+ ) # type: LruCache[str, List[Tuple[str, int]]]
+
async def get_auth_chain(
self, event_ids: Collection[str], include_given: bool = False
) -> List[EventBase]:
@@ -84,17 +90,45 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
else:
results = set()
- base_sql = "SELECT DISTINCT auth_id FROM event_auth WHERE "
+ # We pull out the depth simply so that we can populate the
+ # `_event_auth_cache` cache.
+ base_sql = """
+ SELECT a.event_id, auth_id, depth
+ FROM event_auth AS a
+ INNER JOIN events AS e ON (e.event_id = a.auth_id)
+ WHERE
+ """
front = set(event_ids)
while front:
new_front = set()
for chunk in batch_iter(front, 100):
- clause, args = make_in_list_sql_clause(
- txn.database_engine, "event_id", chunk
- )
- txn.execute(base_sql + clause, args)
- new_front.update(r[0] for r in txn)
+ # Pull the auth events either from the cache or DB.
+ to_fetch = [] # Event IDs to fetch from DB # type: List[str]
+ for event_id in chunk:
+ res = self._event_auth_cache.get(event_id)
+ if res is None:
+ to_fetch.append(event_id)
+ else:
+ new_front.update(auth_id for auth_id, depth in res)
+
+ if to_fetch:
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "a.event_id", to_fetch
+ )
+ txn.execute(base_sql + clause, args)
+
+ # Note we need to batch up the results by event ID before
+ # adding to the cache.
+ to_cache = {}
+ for event_id, auth_event_id, auth_event_depth in txn:
+ to_cache.setdefault(event_id, []).append(
+ (auth_event_id, auth_event_depth)
+ )
+ new_front.add(auth_event_id)
+
+ for event_id, auth_events in to_cache.items():
+ self._event_auth_cache.set(event_id, auth_events)
new_front -= results
@@ -213,14 +247,38 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
break
# Fetch the auth events and their depths of the N last events we're
- # currently walking
+ # currently walking, either from cache or DB.
search, chunk = search[:-100], search[-100:]
- clause, args = make_in_list_sql_clause(
- txn.database_engine, "a.event_id", [e_id for _, e_id in chunk]
- )
- txn.execute(base_sql + clause, args)
- for event_id, auth_event_id, auth_event_depth in txn:
+ found = [] # Results found # type: List[Tuple[str, str, int]]
+ to_fetch = [] # Event IDs to fetch from DB # type: List[str]
+ for _, event_id in chunk:
+ res = self._event_auth_cache.get(event_id)
+ if res is None:
+ to_fetch.append(event_id)
+ else:
+ found.extend((event_id, auth_id, depth) for auth_id, depth in res)
+
+ if to_fetch:
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "a.event_id", to_fetch
+ )
+ txn.execute(base_sql + clause, args)
+
+ # We parse the results and add the to the `found` set and the
+ # cache (note we need to batch up the results by event ID before
+ # adding to the cache).
+ to_cache = {}
+ for event_id, auth_event_id, auth_event_depth in txn:
+ to_cache.setdefault(event_id, []).append(
+ (auth_event_id, auth_event_depth)
+ )
+ found.append((event_id, auth_event_id, auth_event_depth))
+
+ for event_id, auth_events in to_cache.items():
+ self._event_auth_cache.set(event_id, auth_events)
+
+ for event_id, auth_event_id, auth_event_depth in found:
event_to_auth_events.setdefault(event_id, set()).add(auth_event_id)
sets = event_to_missing_sets.get(auth_event_id)
diff --git a/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql b/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql
new file mode 100644
index 0000000000..61c558db77
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('e2e_cross_signing_keys_idx', '{}');
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 5beb302be3..0cdb3ec1f7 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -16,15 +16,18 @@
import logging
from collections import Counter
+from enum import Enum
from itertools import chain
from typing import Any, Dict, List, Optional, Tuple
from twisted.internet.defer import DeferredLock
from synapse.api.constants import EventTypes, Membership
+from synapse.api.errors import StoreError
from synapse.storage.database import DatabasePool
from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.storage.engines import PostgresEngine
+from synapse.types import JsonDict
from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__)
@@ -59,6 +62,23 @@ TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user
TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")}
+class UserSortOrder(Enum):
+ """
+ Enum to define the sorting method used when returning users
+ with get_users_media_usage_paginate
+
+ MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest.
+ MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest.
+ USER_ID = ordered alphabetically by `user_id`.
+ DISPLAYNAME = ordered alphabetically by `displayname`
+ """
+
+ MEDIA_LENGTH = "media_length"
+ MEDIA_COUNT = "media_count"
+ USER_ID = "user_id"
+ DISPLAYNAME = "displayname"
+
+
class StatsStore(StateDeltasStore):
def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
@@ -882,3 +902,110 @@ class StatsStore(StateDeltasStore):
complete_with_stream_id=pos,
absolute_field_overrides={"joined_rooms": joined_rooms},
)
+
+ async def get_users_media_usage_paginate(
+ self,
+ start: int,
+ limit: int,
+ from_ts: Optional[int] = None,
+ until_ts: Optional[int] = None,
+ order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value,
+ direction: Optional[str] = "f",
+ search_term: Optional[str] = None,
+ ) -> Tuple[List[JsonDict], Dict[str, int]]:
+ """Function to retrieve a paginated list of users and their uploaded local media
+ (size and number). This will return a json list of users and the
+ total number of users matching the filter criteria.
+
+ Args:
+ start: offset to begin the query from
+ limit: number of rows to retrieve
+ from_ts: request only media that are created later than this timestamp (ms)
+ until_ts: request only media that are created earlier than this timestamp (ms)
+ order_by: the sort order of the returned list
+ direction: sort ascending or descending
+ search_term: a string to filter user names by
+ Returns:
+ A list of user dicts and an integer representing the total number of
+ users that exist given this query
+ """
+
+ def get_users_media_usage_paginate_txn(txn):
+ filters = []
+ args = [self.hs.config.server_name]
+
+ if search_term:
+ filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)")
+ args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"])
+
+ if from_ts:
+ filters.append("created_ts >= ?")
+ args.extend([from_ts])
+ if until_ts:
+ filters.append("created_ts <= ?")
+ args.extend([until_ts])
+
+ # Set ordering
+ if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH:
+ order_by_column = "media_length"
+ elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT:
+ order_by_column = "media_count"
+ elif UserSortOrder(order_by) == UserSortOrder.USER_ID:
+ order_by_column = "lmr.user_id"
+ elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME:
+ order_by_column = "displayname"
+ else:
+ raise StoreError(
+ 500, "Incorrect value for order_by provided: %s" % order_by
+ )
+
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+ sql_base = """
+ FROM local_media_repository as lmr
+ LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ?
+ {}
+ GROUP BY lmr.user_id, displayname
+ """.format(
+ where_clause
+ )
+
+ # SQLite does not support SELECT COUNT(*) OVER()
+ sql = """
+ SELECT COUNT(*) FROM (
+ SELECT lmr.user_id
+ {sql_base}
+ ) AS count_user_ids
+ """.format(
+ sql_base=sql_base,
+ )
+ txn.execute(sql, args)
+ count = txn.fetchone()[0]
+
+ sql = """
+ SELECT
+ lmr.user_id,
+ displayname,
+ COUNT(lmr.user_id) as media_count,
+ SUM(media_length) as media_length
+ {sql_base}
+ ORDER BY {order_by_column} {order}
+ LIMIT ? OFFSET ?
+ """.format(
+ sql_base=sql_base, order_by_column=order_by_column, order=order,
+ )
+
+ args += [limit, start]
+ txn.execute(sql, args)
+ users = self.db_pool.cursor_to_dict(txn)
+
+ return users, count
+
+ return await self.db_pool.runInteraction(
+ "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn
+ )
|