summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <patrickc@matrix.org>2023-09-22 14:24:31 -0400
committerPatrick Cloke <patrickc@matrix.org>2023-09-22 14:25:23 -0400
commitbe042ce2c646324b0434bf422613c5bbf4d10f76 (patch)
tree373df401ae22f027046f1da82092e7ae013d6e39
parentMerge remote-tracking branch 'origin/develop' into clokep/psycopg3 (diff)
downloadsynapse-be042ce2c646324b0434bf422613c5bbf4d10f76.tar.xz
Separate engines via subclassing.
-rw-r--r--synapse/storage/engines/__init__.py7
-rw-r--r--synapse/storage/engines/postgres.py80
-rw-r--r--synapse/storage/engines/psycopg.py174
-rw-r--r--synapse/storage/engines/psycopg2.py92
4 files changed, 130 insertions, 223 deletions
diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py
index 5e821787a0..53465b274a 100644
--- a/synapse/storage/engines/__init__.py
+++ b/synapse/storage/engines/__init__.py
@@ -14,6 +14,7 @@
 from typing import Any, Mapping, NoReturn, cast
 
 from ._base import BaseDatabaseEngine, IncorrectDatabaseSetup
+from .postgres import PostgresEngine
 
 
 # The classes `PostgresEngine` and `Sqlite3Engine` must always be importable, because
@@ -32,9 +33,9 @@ def dummy_engine(name: str, module: str) -> BaseDatabaseEngine:
 
 
 try:
-    from .postgres import PostgresEngine
+    from .psycopg2 import Psycopg2Engine
 except ImportError:
-    PostgresEngine = dummy_engine("PostgresEngine", "psycopg2")  # type: ignore[misc,assignment]
+    Psycopg2Engine = dummy_engine("Psycopg2Engine", "psycopg2")  # type: ignore[misc,assignment]
 
 try:
     from .psycopg import PsycopgEngine
@@ -54,7 +55,7 @@ def create_engine(database_config: Mapping[str, Any]) -> BaseDatabaseEngine:
         return Sqlite3Engine(database_config)
 
     if name == "psycopg2":
-        return PostgresEngine(database_config)
+        return Psycopg2Engine(database_config)
 
     if name == "psycopg":
         return PsycopgEngine(database_config)
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 9a8146c72c..ec143181fa 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import abc
 import logging
-from typing import TYPE_CHECKING, Any, Mapping, NoReturn, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Mapping, Optional, Tuple, Type, cast
 
-import psycopg2.extensions
+from psycopg import sql
 
 from synapse.storage.engines._base import (
     BaseDatabaseEngine,
+    ConnectionType,
+    CursorType,
     IncorrectDatabaseSetup,
-    IsolationLevel,
 )
-from synapse.storage.types import Cursor
+from synapse.storage.types import Cursor, DBAPI2Module
 
 if TYPE_CHECKING:
     from synapse.storage.database import LoggingDatabaseConnection
@@ -31,19 +33,14 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class PostgresEngine(
-    BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
-):
-    def __init__(self, database_config: Mapping[str, Any]):
-        super().__init__(psycopg2, database_config)
-        psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+class PostgresEngine(BaseDatabaseEngine[ConnectionType, CursorType], metaclass=abc.ABCMeta):
+    isolation_level_map: Mapping[int, int]
+    default_isolation_level: int
+    OperationalError: Type[Exception]
 
-        # Disables passing `bytes` to txn.execute, c.f. #6186. If you do
-        # actually want to use bytes than wrap it in `bytearray`.
-        def _disable_bytes_adapter(_: bytes) -> NoReturn:
-            raise Exception("Passing bytes to DB is disabled.")
+    def __init__(self, module: DBAPI2Module, database_config: Mapping[str, Any]):
+        super().__init__(module, database_config)
 
-        psycopg2.extensions.register_adapter(bytes, _disable_bytes_adapter)
         self.synchronous_commit: bool = database_config.get("synchronous_commit", True)
         # Set the statement timeout to 1 hour by default.
         # Any query taking more than 1 hour should probably be considered a bug;
@@ -56,16 +53,15 @@ class PostgresEngine(
         )
         self._version: Optional[int] = None  # unknown as yet
 
-        self.isolation_level_map: Mapping[int, int] = {
-            IsolationLevel.READ_COMMITTED: psycopg2.extensions.ISOLATION_LEVEL_READ_COMMITTED,
-            IsolationLevel.REPEATABLE_READ: psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ,
-            IsolationLevel.SERIALIZABLE: psycopg2.extensions.ISOLATION_LEVEL_SERIALIZABLE,
-        }
-        self.default_isolation_level = (
-            psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ
-        )
         self.config = database_config
 
+    @abc.abstractmethod
+    def get_server_version(self, db_conn: ConnectionType) -> int:
+        """Gets called when setting up a brand new database. This allows us to
+        apply stricter checks on new databases versus existing database.
+        """
+        ...
+
     @property
     def single_threaded(self) -> bool:
         return False
@@ -79,14 +75,14 @@ class PostgresEngine(
 
     def check_database(
         self,
-        db_conn: psycopg2.extensions.connection,
+        db_conn: ConnectionType,
         allow_outdated_version: bool = False,
     ) -> None:
         # Get the version of PostgreSQL that we're using. As per the psycopg2
         # docs: The number is formed by converting the major, minor, and
         # revision numbers into two-decimal-digit numbers and appending them
         # together. For example, version 8.1.5 will be returned as 80105
-        self._version = db_conn.server_version
+        self._version = self.get_server_version(db_conn)
         allow_unsafe_locale = self.config.get("allow_unsafe_locale", False)
 
         # Are we on a supported PostgreSQL version?
@@ -154,7 +150,7 @@ class PostgresEngine(
         return sql.replace("?", "%s")
 
     def on_new_connection(self, db_conn: "LoggingDatabaseConnection") -> None:
-        db_conn.set_isolation_level(self.default_isolation_level)
+        self.attempt_to_set_isolation_level(db_conn.conn, self.default_isolation_level)
 
         # Set the bytea output to escape, vs the default of hex
         cursor = db_conn.cursor()
@@ -168,7 +164,8 @@ class PostgresEngine(
 
         # Abort really long-running statements and turn them into errors.
         if self.statement_timeout is not None:
-            cursor.execute("SET statement_timeout TO ?", (self.statement_timeout,))
+            cursor.execute(sql.SQL("SET statement_timeout TO {}").format(self.statement_timeout))
+            #cursor.execute("SELECT set_config( 'statement_timeout', ?, false)", (self.statement_timeout,))
 
         cursor.close()
         db_conn.commit()
@@ -193,15 +190,7 @@ class PostgresEngine(
         """Do we support the `CREATE SEQUENCE` clause?"""
         return True
 
-    def is_deadlock(self, error: Exception) -> bool:
-        if isinstance(error, psycopg2.DatabaseError):
-            # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
-            # "40001" serialization_failure
-            # "40P01" deadlock_detected
-            return error.pgcode in ["40001", "40P01"]
-        return False
-
-    def is_connection_closed(self, conn: psycopg2.extensions.connection) -> bool:
+    def is_connection_closed(self, conn: ConnectionType) -> bool:
         return bool(conn.closed)
 
     def lock_table(self, txn: Cursor, table: str) -> None:
@@ -225,25 +214,8 @@ class PostgresEngine(
     def row_id_name(self) -> str:
         return "ctid"
 
-    def in_transaction(self, conn: psycopg2.extensions.connection) -> bool:
-        return conn.status != psycopg2.extensions.STATUS_READY
-
-    def attempt_to_set_autocommit(
-        self, conn: psycopg2.extensions.connection, autocommit: bool
-    ) -> None:
-        return conn.set_session(autocommit=autocommit)
-
-    def attempt_to_set_isolation_level(
-        self, conn: psycopg2.extensions.connection, isolation_level: Optional[int]
-    ) -> None:
-        if isolation_level is None:
-            isolation_level = self.default_isolation_level
-        else:
-            isolation_level = self.isolation_level_map[isolation_level]
-        return conn.set_isolation_level(isolation_level)
-
     @staticmethod
-    def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
+    def executescript(cursor: CursorType, script: str) -> None:
         """Execute a chunk of SQL containing multiple semicolon-delimited statements.
 
         Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
diff --git a/synapse/storage/engines/psycopg.py b/synapse/storage/engines/psycopg.py
index 5ca7e5618e..e9cc02e795 100644
--- a/synapse/storage/engines/psycopg.py
+++ b/synapse/storage/engines/psycopg.py
@@ -13,27 +13,27 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Any, Mapping, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Mapping, Optional
 
 import psycopg
 import psycopg.errors
 import psycopg.sql
 
+from synapse.storage.engines import PostgresEngine
 from synapse.storage.engines._base import (
-    BaseDatabaseEngine,
-    IncorrectDatabaseSetup,
     IsolationLevel,
 )
-from synapse.storage.types import Cursor
 
 if TYPE_CHECKING:
-    from synapse.storage.database import LoggingDatabaseConnection
+    pass
 
 
 logger = logging.getLogger(__name__)
 
 
-class PsycopgEngine(BaseDatabaseEngine[psycopg.Connection, psycopg.Cursor]):
+class PsycopgEngine(PostgresEngine[psycopg.Connection, psycopg.Cursor]):
+    OperationalError = psycopg.OperationalError
+
     def __init__(self, database_config: Mapping[str, Any]):
         super().__init__(psycopg, database_config)
         # psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
@@ -43,101 +43,15 @@ class PsycopgEngine(BaseDatabaseEngine[psycopg.Connection, psycopg.Cursor]):
         # def _disable_bytes_adapter(_: bytes) -> NoReturn:
         #     raise Exception("Passing bytes to DB is disabled.")
 
-        # psycopg2.extensions.register_adapter(bytes, _disable_bytes_adapter)
-        self.synchronous_commit: bool = database_config.get("synchronous_commit", True)
-        self._version: Optional[int] = None  # unknown as yet
-
         self.isolation_level_map: Mapping[int, psycopg.IsolationLevel] = {
             IsolationLevel.READ_COMMITTED: psycopg.IsolationLevel.READ_COMMITTED,
             IsolationLevel.REPEATABLE_READ: psycopg.IsolationLevel.REPEATABLE_READ,
             IsolationLevel.SERIALIZABLE: psycopg.IsolationLevel.SERIALIZABLE,
         }
         self.default_isolation_level = psycopg.IsolationLevel.REPEATABLE_READ
-        self.config = database_config
-
-    @property
-    def single_threaded(self) -> bool:
-        return False
 
-    def get_db_locale(self, txn: Cursor) -> Tuple[str, str]:
-        txn.execute(
-            "SELECT datcollate, datctype FROM pg_database WHERE datname = current_database()"
-        )
-        collation, ctype = cast(Tuple[str, str], txn.fetchone())
-        return collation, ctype
-
-    def check_database(
-        self,
-        db_conn: psycopg.Connection,
-        allow_outdated_version: bool = False,
-    ) -> None:
-        # Get the version of PostgreSQL that we're using. As per the psycopg
-        # docs: The number is formed by converting the major, minor, and
-        # revision numbers into two-decimal-digit numbers and appending them
-        # together. For example, version 8.1.5 will be returned as 80105
-        self._version = db_conn.info.server_version
-        allow_unsafe_locale = self.config.get("allow_unsafe_locale", False)
-
-        # Are we on a supported PostgreSQL version?
-        if not allow_outdated_version and self._version < 100000:
-            raise RuntimeError("Synapse requires PostgreSQL 10 or above.")
-
-        with db_conn.cursor() as txn:
-            txn.execute("SHOW SERVER_ENCODING")
-            rows = txn.fetchall()
-            if rows and rows[0][0] != "UTF8":
-                raise IncorrectDatabaseSetup(
-                    "Database has incorrect encoding: '%s' instead of 'UTF8'\n"
-                    "See docs/postgres.md for more information." % (rows[0][0],)
-                )
-
-            collation, ctype = self.get_db_locale(txn)
-            if collation != "C":
-                logger.warning(
-                    "Database has incorrect collation of %r. Should be 'C'",
-                    collation,
-                )
-                if not allow_unsafe_locale:
-                    raise IncorrectDatabaseSetup(
-                        "Database has incorrect collation of %r. Should be 'C'\n"
-                        "See docs/postgres.md for more information. You can override this check by"
-                        "setting 'allow_unsafe_locale' to true in the database config.",
-                        collation,
-                    )
-
-            if ctype != "C":
-                if not allow_unsafe_locale:
-                    logger.warning(
-                        "Database has incorrect ctype of %r. Should be 'C'",
-                        ctype,
-                    )
-                    raise IncorrectDatabaseSetup(
-                        "Database has incorrect ctype of %r. Should be 'C'\n"
-                        "See docs/postgres.md for more information. You can override this check by"
-                        "setting 'allow_unsafe_locale' to true in the database config.",
-                        ctype,
-                    )
-
-    def check_new_database(self, txn: Cursor) -> None:
-        """Gets called when setting up a brand new database. This allows us to
-        apply stricter checks on new databases versus existing database.
-        """
-
-        collation, ctype = self.get_db_locale(txn)
-
-        errors = []
-
-        if collation != "C":
-            errors.append("    - 'COLLATE' is set to %r. Should be 'C'" % (collation,))
-
-        if ctype != "C":
-            errors.append("    - 'CTYPE' is set to %r. Should be 'C'" % (ctype,))
-
-        if errors:
-            raise IncorrectDatabaseSetup(
-                "Database is incorrectly configured:\n\n%s\n\n"
-                "See docs/postgres.md for more information." % ("\n".join(errors))
-            )
+    def get_server_version(self, db_conn: psycopg.Connection) -> int:
+        return db_conn.info.server_version
 
     def convert_param_style(self, sql: str) -> str:
         if isinstance(sql, psycopg.sql.Composed):
@@ -145,42 +59,6 @@ class PsycopgEngine(BaseDatabaseEngine[psycopg.Connection, psycopg.Cursor]):
 
         return sql.replace("?", "%s")
 
-    def on_new_connection(self, db_conn: "LoggingDatabaseConnection") -> None:
-        db_conn.set_isolation_level(self.default_isolation_level)
-
-        # Set the bytea output to escape, vs the default of hex
-        cursor = db_conn.cursor()
-        cursor.execute("SET bytea_output TO escape")
-
-        # Asynchronous commit, don't wait for the server to call fsync before
-        # ending the transaction.
-        # https://www.postgresql.org/docs/current/static/wal-async-commit.html
-        if not self.synchronous_commit:
-            cursor.execute("SET synchronous_commit TO OFF")
-
-        cursor.close()
-        db_conn.commit()
-
-    @property
-    def supports_using_any_list(self) -> bool:
-        """Do we support using `a = ANY(?)` and passing a list"""
-        return True
-
-    @property
-    def supports_returning(self) -> bool:
-        """Do we support the `RETURNING` clause in insert/update/delete?"""
-        return True
-
-    @property
-    def supports_select_distinct_on(self) -> bool:
-        """Do we support the `DISTINCT ON` clause in SELECT?"""
-        return True
-
-    @property
-    def supports_sequences(self) -> bool:
-        """Do we support the `CREATE SEQUENCE` clause?"""
-        return True
-
     def is_deadlock(self, error: Exception) -> bool:
         if isinstance(error, psycopg.errors.Error):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
@@ -189,30 +67,6 @@ class PsycopgEngine(BaseDatabaseEngine[psycopg.Connection, psycopg.Cursor]):
             return error.sqlstate in ["40001", "40P01"]
         return False
 
-    def is_connection_closed(self, conn: psycopg.Connection) -> bool:
-        return bool(conn.closed)
-
-    def lock_table(self, txn: Cursor, table: str) -> None:
-        txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,))
-
-    @property
-    def server_version(self) -> str:
-        """Returns a string giving the server version. For example: '8.1.5'."""
-        # note that this is a bit of a hack because it relies on check_database
-        # having been called. Still, that should be a safe bet here.
-        numver = self._version
-        assert numver is not None
-
-        # https://www.postgresql.org/docs/current/libpq-status.html#LIBPQ-PQSERVERVERSION
-        if numver >= 100000:
-            return "%i.%i" % (numver / 10000, numver % 10000)
-        else:
-            return "%i.%i.%i" % (numver / 10000, (numver % 10000) / 100, numver % 100)
-
-    @property
-    def row_id_name(self) -> str:
-        return "ctid"
-
     def in_transaction(self, conn: psycopg.Connection) -> bool:
         return conn.info.transaction_status != psycopg.pq.TransactionStatus.IDLE
 
@@ -229,15 +83,3 @@ class PsycopgEngine(BaseDatabaseEngine[psycopg.Connection, psycopg.Cursor]):
         else:
             pg_isolation_level = self.isolation_level_map[isolation_level]
         conn.isolation_level = pg_isolation_level
-
-    @staticmethod
-    def executescript(cursor: psycopg.Cursor, script: str) -> None:
-        """Execute a chunk of SQL containing multiple semicolon-delimited statements.
-
-        Psycopg seems happy to do this in DBAPI2's `execute()` function.
-
-        For consistency with SQLite, any ongoing transaction is committed before
-        executing the script in its own transaction. The script transaction is
-        left open and it is the responsibility of the caller to commit it.
-        """
-        cursor.execute(f"COMMIT; BEGIN TRANSACTION; {script}")
diff --git a/synapse/storage/engines/psycopg2.py b/synapse/storage/engines/psycopg2.py
new file mode 100644
index 0000000000..78b7a4c51f
--- /dev/null
+++ b/synapse/storage/engines/psycopg2.py
@@ -0,0 +1,92 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Any, Mapping, Optional
+
+import psycopg2.extensions
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines._base import (
+    IsolationLevel,
+)
+
+if TYPE_CHECKING:
+    pass
+
+
+logger = logging.getLogger(__name__)
+
+
+class Psycopg2Engine(
+    PostgresEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
+):
+    OperationalError = psycopg2.OperationalError
+
+    def __init__(self, database_config: Mapping[str, Any]):
+        super().__init__(psycopg2, database_config)
+        psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+
+        self.isolation_level_map: Mapping[int, int] = {
+            IsolationLevel.READ_COMMITTED: psycopg2.extensions.ISOLATION_LEVEL_READ_COMMITTED,
+            IsolationLevel.REPEATABLE_READ: psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ,
+            IsolationLevel.SERIALIZABLE: psycopg2.extensions.ISOLATION_LEVEL_SERIALIZABLE,
+        }
+        self.default_isolation_level = (
+            psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ
+        )
+        self.config = database_config
+
+    def get_server_version(self, db_conn: psycopg2.extensions.connection) -> int:
+        return db_conn.server_version
+
+    def convert_param_style(self, sql: str) -> str:
+        return sql.replace("?", "%s")
+
+    def is_deadlock(self, error: Exception) -> bool:
+        if isinstance(error, psycopg2.DatabaseError):
+            # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
+            # "40001" serialization_failure
+            # "40P01" deadlock_detected
+            return error.pgcode in ["40001", "40P01"]
+        return False
+
+    def in_transaction(self, conn: psycopg2.extensions.connection) -> bool:
+        return conn.status != psycopg2.extensions.STATUS_READY
+
+    def attempt_to_set_autocommit(
+        self, conn: psycopg2.extensions.connection, autocommit: bool
+    ) -> None:
+        return conn.set_session(autocommit=autocommit)
+
+    def attempt_to_set_isolation_level(
+        self, conn: psycopg2.extensions.connection, isolation_level: Optional[int]
+    ) -> None:
+        if isolation_level is None:
+            isolation_level = self.default_isolation_level
+        else:
+            isolation_level = self.isolation_level_map[isolation_level]
+        return conn.set_isolation_level(isolation_level)
+
+    @staticmethod
+    def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
+        """Execute a chunk of SQL containing multiple semicolon-delimited statements.
+
+        Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
+
+        For consistency with SQLite, any ongoing transaction is committed before
+        executing the script in its own transaction. The script transaction is
+        left open and it is the responsibility of the caller to commit it.
+        """
+        cursor.execute(f"COMMIT; BEGIN TRANSACTION; {script}")