summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2022-01-04 16:36:33 +0000
committerGitHub <noreply@github.com>2022-01-04 16:36:33 +0000
commitb38bdae3a2e5b7cfe862580368b996b8d7dfa50f (patch)
tree2ee349524c1fbe9e34ac6439dc5a6ff7ed39a570
parent`FederationClient.backfill`: stop flagging events as outliers (#11632) (diff)
downloadsynapse-b38bdae3a2e5b7cfe862580368b996b8d7dfa50f.tar.xz
Fix AssertionErrors after purging events (#11642)
* Fix AssertionErrors after purging events

If you purged a bunch of events from your database, and then restarted synapse
without receiving more events, then you would get a bunch of AssertionErrors on
restart.

This fixes the situation by rewinding the stream processors.

* `check-newsfragment`: ignore deleted newsfiles
-rw-r--r--changelog.d/11536.bugfix1
-rw-r--r--changelog.d/11536.misc1
-rw-r--r--changelog.d/11642.bugfix1
-rwxr-xr-xscripts-dev/check-newsfragment4
-rw-r--r--synapse/handlers/stats.py11
-rw-r--r--synapse/handlers/user_directory.py18
6 files changed, 30 insertions, 6 deletions
diff --git a/changelog.d/11536.bugfix b/changelog.d/11536.bugfix
new file mode 100644
index 0000000000..4a1b007254
--- /dev/null
+++ b/changelog.d/11536.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database.
diff --git a/changelog.d/11536.misc b/changelog.d/11536.misc
deleted file mode 100644
index b9191c111b..0000000000
--- a/changelog.d/11536.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improvements to log messages around handling stream ids.
diff --git a/changelog.d/11642.bugfix b/changelog.d/11642.bugfix
new file mode 100644
index 0000000000..4a1b007254
--- /dev/null
+++ b/changelog.d/11642.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database.
diff --git a/scripts-dev/check-newsfragment b/scripts-dev/check-newsfragment
index af4de345df..c764011d6a 100755
--- a/scripts-dev/check-newsfragment
+++ b/scripts-dev/check-newsfragment
@@ -42,8 +42,8 @@ echo "--------------------------"
 echo
 
 matched=0
-for f in $(git diff --name-only FETCH_HEAD... -- changelog.d); do
-    # check that any modified newsfiles on this branch end with a full stop.
+for f in $(git diff --diff-filter=d --name-only FETCH_HEAD... -- changelog.d); do
+    # check that any added newsfiles on this branch end with a full stop.
     lastchar=$(tr -d '\n' < "$f" | tail -c 1)
     if [ "$lastchar" != '.' ] && [ "$lastchar" != '!' ]; then
         echo -e "\e[31mERROR: newsfragment $f does not end with a '.' or '!'\e[39m" >&2
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index bd3e6f2ec7..29e41a4c79 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -80,6 +80,17 @@ class StatsHandler:
         # If self.pos is None then means we haven't fetched it from DB
         if self.pos is None:
             self.pos = await self.store.get_stats_positions()
+            room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+            if self.pos > room_max_stream_ordering:
+                # apparently, we've processed more events than exist in the database!
+                # this can happen if events are removed with history purge or similar.
+                logger.warning(
+                    "Event stream ordering appears to have gone backwards (%i -> %i): "
+                    "rewinding stats processor",
+                    self.pos,
+                    room_max_stream_ordering,
+                )
+                self.pos = room_max_stream_ordering
 
         # Loop round handling deltas until we're up to date
 
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index a0eb45446f..1565e034cb 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -148,9 +148,21 @@ class UserDirectoryHandler(StateDeltasHandler):
         if self.pos is None:
             self.pos = await self.store.get_user_directory_stream_pos()
 
-        # If still None then the initial background update hasn't happened yet.
-        if self.pos is None:
-            return None
+            # If still None then the initial background update hasn't happened yet.
+            if self.pos is None:
+                return None
+
+            room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+            if self.pos > room_max_stream_ordering:
+                # apparently, we've processed more events than exist in the database!
+                # this can happen if events are removed with history purge or similar.
+                logger.warning(
+                    "Event stream ordering appears to have gone backwards (%i -> %i): "
+                    "rewinding user directory processor",
+                    self.pos,
+                    room_max_stream_ordering,
+                )
+                self.pos = room_max_stream_ordering
 
         # Loop round handling deltas until we're up to date
         while True: