diff options
author | Erik Johnston <erikj@element.io> | 2024-05-29 13:19:10 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-29 12:19:10 +0000 |
commit | 466f344547fc6bea2c43257dd65286380fbb512d (patch) | |
tree | 40ba5abd666ac6584b7d7cbae3603a3898ce91c5 /tests/storage | |
parent | Don't invalidate all `get_relations_for_event` on history purge (#17083) (diff) | |
download | synapse-466f344547fc6bea2c43257dd65286380fbb512d.tar.xz |
Move towards using `MultiWriterIdGenerator` everywhere (#17226)
There is a problem with `StreamIdGenerator` where it can go backwards over restarts when a stream ID is requested but then not inserted into the DB. This is problematic if we want to land #17215, and is generally a potential cause for all sorts of nastiness. Instead of trying to fix `StreamIdGenerator`, we may as well move to `MultiWriterIdGenerator` that does not suffer from this problem (the latest positions are stored in `stream_positions` table). This involves adding SQLite support to the class. This only changes id generators that were already using `MultiWriterIdGenerator` under postgres, a separate PR will move the rest of the uses of `StreamIdGenerator` over.
Diffstat (limited to 'tests/storage')
-rw-r--r-- | tests/storage/test_id_generators.py | 351 |
1 files changed, 187 insertions, 164 deletions
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 409d856ab9..fad9511cea 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -31,6 +31,11 @@ from synapse.storage.database import ( from synapse.storage.engines import IncorrectDatabaseSetup from synapse.storage.types import Cursor from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.util.sequence import ( + LocalSequenceGenerator, + PostgresSequenceGenerator, + SequenceGenerator, +) from synapse.util import Clock from tests.unittest import HomeserverTestCase @@ -175,18 +180,22 @@ class StreamIdGeneratorTestCase(HomeserverTestCase): self.get_success(test_gen_next()) -class MultiWriterIdGeneratorTestCase(HomeserverTestCase): - if not USE_POSTGRES_FOR_TESTS: - skip = "Requires Postgres" - +class MultiWriterIdGeneratorBase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main self.db_pool: DatabasePool = self.store.db_pool self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db)) + if USE_POSTGRES_FOR_TESTS: + self.seq_gen: SequenceGenerator = PostgresSequenceGenerator("foobar_seq") + else: + self.seq_gen = LocalSequenceGenerator(lambda _: 0) + def _setup_db(self, txn: LoggingTransaction) -> None: - txn.execute("CREATE SEQUENCE foobar_seq") + if USE_POSTGRES_FOR_TESTS: + txn.execute("CREATE SEQUENCE foobar_seq") + txn.execute( """ CREATE TABLE foobar ( @@ -221,44 +230,27 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): def _insert(txn: LoggingTransaction) -> None: for _ in range(number): + next_val = self.seq_gen.get_next_id_txn(txn) txn.execute( - "INSERT INTO foobar VALUES (nextval('foobar_seq'), ?)", - (instance_name,), + "INSERT INTO foobar (stream_id, instance_name) VALUES (?, ?)", + ( + next_val, + instance_name, + ), ) + txn.execute( """ - INSERT INTO stream_positions VALUES ('test_stream', ?, lastval()) - ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = lastval() + INSERT INTO stream_positions VALUES ('test_stream', ?, ?) + ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = ? """, - (instance_name,), + (instance_name, next_val, next_val), ) self.get_success(self.db_pool.runInteraction("_insert_rows", _insert)) - def _insert_row_with_id(self, instance_name: str, stream_id: int) -> None: - """Insert one row as the given instance with given stream_id, updating - the postgres sequence position to match. - """ - - def _insert(txn: LoggingTransaction) -> None: - txn.execute( - "INSERT INTO foobar VALUES (?, ?)", - ( - stream_id, - instance_name, - ), - ) - txn.execute("SELECT setval('foobar_seq', ?)", (stream_id,)) - txn.execute( - """ - INSERT INTO stream_positions VALUES ('test_stream', ?, ?) - ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = ? - """, - (instance_name, stream_id, stream_id), - ) - - self.get_success(self.db_pool.runInteraction("_insert_row_with_id", _insert)) +class MultiWriterIdGeneratorTestCase(MultiWriterIdGeneratorBase): def test_empty(self) -> None: """Test an ID generator against an empty database gives sensible current positions. @@ -347,137 +339,106 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): self.assertEqual(id_gen.get_positions(), {"master": 11}) self.assertEqual(id_gen.get_current_token_for_writer("master"), 11) - def test_multi_instance(self) -> None: - """Test that reads and writes from multiple processes are handled - correctly. - """ - self._insert_rows("first", 3) - self._insert_rows("second", 4) + def test_get_next_txn(self) -> None: + """Test that the `get_next_txn` function works correctly.""" - first_id_gen = self._create_id_generator("first", writers=["first", "second"]) - second_id_gen = self._create_id_generator("second", writers=["first", "second"]) + # Prefill table with 7 rows written by 'master' + self._insert_rows("master", 7) - self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 7}) - self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7) - self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7) + id_gen = self._create_id_generator() - self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7}) - self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7) - self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7) + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) # Try allocating a new ID gen and check that we only see position # advanced after we leave the context manager. - async def _get_next_async() -> None: - async with first_id_gen.get_next() as stream_id: - self.assertEqual(stream_id, 8) - - self.assertEqual( - first_id_gen.get_positions(), {"first": 3, "second": 7} - ) - self.assertEqual(first_id_gen.get_persisted_upto_position(), 7) - - self.get_success(_get_next_async()) - - self.assertEqual(first_id_gen.get_positions(), {"first": 8, "second": 7}) - - # However the ID gen on the second instance won't have seen the update - self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7}) - - # ... but calling `get_next` on the second instance should give a unique - # stream ID + def _get_next_txn(txn: LoggingTransaction) -> None: + stream_id = id_gen.get_next_txn(txn) + self.assertEqual(stream_id, 8) - async def _get_next_async2() -> None: - async with second_id_gen.get_next() as stream_id: - self.assertEqual(stream_id, 9) + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) - self.assertEqual( - second_id_gen.get_positions(), {"first": 3, "second": 7} - ) + self.get_success(self.db_pool.runInteraction("test", _get_next_txn)) - self.get_success(_get_next_async2()) + self.assertEqual(id_gen.get_positions(), {"master": 8}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) - self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 9}) + def test_restart_during_out_of_order_persistence(self) -> None: + """Test that restarting a process while another process is writing out + of order updates are handled correctly. + """ - # If the second ID gen gets told about the first, it correctly updates - second_id_gen.advance("first", 8) - self.assertEqual(second_id_gen.get_positions(), {"first": 8, "second": 9}) + # Prefill table with 7 rows written by 'master' + self._insert_rows("master", 7) - def test_multi_instance_empty_row(self) -> None: - """Test that reads and writes from multiple processes are handled - correctly, when one of the writers starts without any rows. - """ - # Insert some rows for two out of three of the ID gens. - self._insert_rows("first", 3) - self._insert_rows("second", 4) + id_gen = self._create_id_generator() - first_id_gen = self._create_id_generator( - "first", writers=["first", "second", "third"] - ) - second_id_gen = self._create_id_generator( - "second", writers=["first", "second", "third"] - ) - third_id_gen = self._create_id_generator( - "third", writers=["first", "second", "third"] - ) + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) - self.assertEqual( - first_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} - ) - self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7) - self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7) - self.assertEqual(first_id_gen.get_current_token_for_writer("third"), 7) + # Persist two rows at once + ctx1 = id_gen.get_next() + ctx2 = id_gen.get_next() - self.assertEqual( - second_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} - ) - self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7) - self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7) - self.assertEqual(second_id_gen.get_current_token_for_writer("third"), 7) + s1 = self.get_success(ctx1.__aenter__()) + s2 = self.get_success(ctx2.__aenter__()) - # Try allocating a new ID gen and check that we only see position - # advanced after we leave the context manager. + self.assertEqual(s1, 8) + self.assertEqual(s2, 9) - async def _get_next_async() -> None: - async with third_id_gen.get_next() as stream_id: - self.assertEqual(stream_id, 8) + self.assertEqual(id_gen.get_positions(), {"master": 7}) + self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) - self.assertEqual( - third_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} - ) - self.assertEqual(third_id_gen.get_persisted_upto_position(), 7) + # We finish persisting the second row before restart + self.get_success(ctx2.__aexit__(None, None, None)) - self.get_success(_get_next_async()) + # We simulate a restart of another worker by just creating a new ID gen. + id_gen_worker = self._create_id_generator("worker") - self.assertEqual( - third_id_gen.get_positions(), {"first": 3, "second": 7, "third": 8} - ) + # Restarted worker should not see the second persisted row + self.assertEqual(id_gen_worker.get_positions(), {"master": 7}) + self.assertEqual(id_gen_worker.get_current_token_for_writer("master"), 7) - def test_get_next_txn(self) -> None: - """Test that the `get_next_txn` function works correctly.""" + # Now if we persist the first row then both instances should jump ahead + # correctly. + self.get_success(ctx1.__aexit__(None, None, None)) - # Prefill table with 7 rows written by 'master' - self._insert_rows("master", 7) + self.assertEqual(id_gen.get_positions(), {"master": 9}) + id_gen_worker.advance("master", 9) + self.assertEqual(id_gen_worker.get_positions(), {"master": 9}) - id_gen = self._create_id_generator() - self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) +class WorkerMultiWriterIdGeneratorTestCase(MultiWriterIdGeneratorBase): + if not USE_POSTGRES_FOR_TESTS: + skip = "Requires Postgres" - # Try allocating a new ID gen and check that we only see position - # advanced after we leave the context manager. + def _insert_row_with_id(self, instance_name: str, stream_id: int) -> None: + """Insert one row as the given instance with given stream_id, updating + the postgres sequence position to match. + """ - def _get_next_txn(txn: LoggingTransaction) -> None: - stream_id = id_gen.get_next_txn(txn) - self.assertEqual(stream_id, 8) + def _insert(txn: LoggingTransaction) -> None: + txn.execute( + "INSERT INTO foobar (stream_id, instance_name) VALUES (?, ?)", + ( + stream_id, + instance_name, + ), + ) - self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + txn.execute("SELECT setval('foobar_seq', ?)", (stream_id,)) - self.get_success(self.db_pool.runInteraction("test", _get_next_txn)) + txn.execute( + """ + INSERT INTO stream_positions VALUES ('test_stream', ?, ?) + ON CONFLICT (stream_name, instance_name) DO UPDATE SET stream_id = ? + """, + (instance_name, stream_id, stream_id), + ) - self.assertEqual(id_gen.get_positions(), {"master": 8}) - self.assertEqual(id_gen.get_current_token_for_writer("master"), 8) + self.get_success(self.db_pool.runInteraction("_insert_row_with_id", _insert)) def test_get_persisted_upto_position(self) -> None: """Test that `get_persisted_upto_position` correctly tracks updates to @@ -548,49 +509,111 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase): # `persisted_upto_position` in this case, then it will be correct in the # other cases that are tested above (since they'll hit the same code). - def test_restart_during_out_of_order_persistence(self) -> None: - """Test that restarting a process while another process is writing out - of order updates are handled correctly. + def test_multi_instance(self) -> None: + """Test that reads and writes from multiple processes are handled + correctly. """ + self._insert_rows("first", 3) + self._insert_rows("second", 4) - # Prefill table with 7 rows written by 'master' - self._insert_rows("master", 7) + first_id_gen = self._create_id_generator("first", writers=["first", "second"]) + second_id_gen = self._create_id_generator("second", writers=["first", "second"]) - id_gen = self._create_id_generator() + self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 7}) + self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7) + self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7) - self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7}) + self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7) + self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7) - # Persist two rows at once - ctx1 = id_gen.get_next() - ctx2 = id_gen.get_next() + # Try allocating a new ID gen and check that we only see position + # advanced after we leave the context manager. - s1 = self.get_success(ctx1.__aenter__()) - s2 = self.get_success(ctx2.__aenter__()) + async def _get_next_async() -> None: + async with first_id_gen.get_next() as stream_id: + self.assertEqual(stream_id, 8) - self.assertEqual(s1, 8) - self.assertEqual(s2, 9) + self.assertEqual( + first_id_gen.get_positions(), {"first": 3, "second": 7} + ) + self.assertEqual(first_id_gen.get_persisted_upto_position(), 7) - self.assertEqual(id_gen.get_positions(), {"master": 7}) - self.assertEqual(id_gen.get_current_token_for_writer("master"), 7) + self.get_success(_get_next_async()) - # We finish persisting the second row before restart - self.get_success(ctx2.__aexit__(None, None, None)) + self.assertEqual(first_id_gen.get_positions(), {"first": 8, "second": 7}) - # We simulate a restart of another worker by just creating a new ID gen. - id_gen_worker = self._create_id_generator("worker") + # However the ID gen on the second instance won't have seen the update + self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7}) - # Restarted worker should not see the second persisted row - self.assertEqual(id_gen_worker.get_positions(), {"master": 7}) - self.assertEqual(id_gen_worker.get_current_token_for_writer("master"), 7) + # ... but calling `get_next` on the second instance should give a unique + # stream ID - # Now if we persist the first row then both instances should jump ahead - # correctly. - self.get_success(ctx1.__aexit__(None, None, None)) + async def _get_next_async2() -> None: + async with second_id_gen.get_next() as stream_id: + self.assertEqual(stream_id, 9) - self.assertEqual(id_gen.get_positions(), {"master": 9}) - id_gen_worker.advance("master", 9) - self.assertEqual(id_gen_worker.get_positions(), {"master": 9}) + self.assertEqual( + second_id_gen.get_positions(), {"first": 3, "second": 7} + ) + + self.get_success(_get_next_async2()) + + self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 9}) + + # If the second ID gen gets told about the first, it correctly updates + second_id_gen.advance("first", 8) + self.assertEqual(second_id_gen.get_positions(), {"first": 8, "second": 9}) + + def test_multi_instance_empty_row(self) -> None: + """Test that reads and writes from multiple processes are handled + correctly, when one of the writers starts without any rows. + """ + # Insert some rows for two out of three of the ID gens. + self._insert_rows("first", 3) + self._insert_rows("second", 4) + + first_id_gen = self._create_id_generator( + "first", writers=["first", "second", "third"] + ) + second_id_gen = self._create_id_generator( + "second", writers=["first", "second", "third"] + ) + third_id_gen = self._create_id_generator( + "third", writers=["first", "second", "third"] + ) + + self.assertEqual( + first_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} + ) + self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7) + self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7) + self.assertEqual(first_id_gen.get_current_token_for_writer("third"), 7) + + self.assertEqual( + second_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} + ) + self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7) + self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7) + self.assertEqual(second_id_gen.get_current_token_for_writer("third"), 7) + + # Try allocating a new ID gen and check that we only see position + # advanced after we leave the context manager. + + async def _get_next_async() -> None: + async with third_id_gen.get_next() as stream_id: + self.assertEqual(stream_id, 8) + + self.assertEqual( + third_id_gen.get_positions(), {"first": 3, "second": 7, "third": 7} + ) + self.assertEqual(third_id_gen.get_persisted_upto_position(), 7) + + self.get_success(_get_next_async()) + + self.assertEqual( + third_id_gen.get_positions(), {"first": 3, "second": 7, "third": 8} + ) def test_writer_config_change(self) -> None: """Test that changing the writer config correctly works.""" |