diff options
author | Amber Brown <hawkowl@atleastfornow.net> | 2019-03-27 02:49:28 +1100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-27 02:49:28 +1100 |
commit | 903f04c21fec3bbc1337bc9552f223e17b87f6bf (patch) | |
tree | 9e6e9fd12d4c8c293605126b3eaf7a14035e96e6 | |
parent | Make federation endpoints more tolerant of trailing slashes v2 (#4935) (diff) | |
download | synapse-903f04c21fec3bbc1337bc9552f223e17b87f6bf.tar.xz |
Use the state event amount for userdir import batching, not room count (#4944)
Diffstat (limited to '')
-rw-r--r-- | changelog.d/4944.feature | 1 | ||||
-rw-r--r-- | synapse/storage/user_directory.py | 28 |
2 files changed, 21 insertions, 8 deletions
diff --git a/changelog.d/4944.feature b/changelog.d/4944.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4944.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 65bdb1b4a5..4d60a5726f 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -135,7 +135,12 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): @defer.inlineCallbacks def _populate_user_directory_process_rooms(self, progress, batch_size): - + """ + Args: + progress (dict) + batch_size (int): Maximum number of state events to process + per cycle. + """ state = self.hs.get_state_handler() # If we don't have progress filed, delete everything. @@ -143,13 +148,14 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): yield self.delete_all_from_user_dir() def _get_next_batch(txn): + # Only fetch 250 rooms, so we don't fetch too many at once, even + # if those 250 rooms have less than batch_size state events. sql = """ - SELECT room_id FROM %s + SELECT room_id, events FROM %s ORDER BY events DESC - LIMIT %s + LIMIT 250 """ % ( TEMP_TABLE + "_rooms", - str(batch_size), ) txn.execute(sql) rooms_to_work_on = txn.fetchall() @@ -157,8 +163,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): if not rooms_to_work_on: return None - rooms_to_work_on = [x[0] for x in rooms_to_work_on] - # Get how many are left to process, so we can give status on how # far we are in processing txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") @@ -180,7 +184,9 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): % (len(rooms_to_work_on), progress["remaining"]) ) - for room_id in rooms_to_work_on: + processed_event_count = 0 + + for room_id, event_count in rooms_to_work_on: is_in_room = yield self.is_host_joined(room_id, self.server_name) if is_in_room: @@ -247,7 +253,13 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): progress, ) - defer.returnValue(len(rooms_to_work_on)) + processed_event_count += event_count + + if processed_event_count > batch_size: + # Don't process any more rooms, we've hit our batch size. + defer.returnValue(processed_event_count) + + defer.returnValue(processed_event_count) @defer.inlineCallbacks def _populate_user_directory_process_users(self, progress, batch_size): |