diff options
37 files changed, 480 insertions, 328 deletions
diff --git a/changelog.d/9975.misc b/changelog.d/9975.misc new file mode 100644 index 0000000000..28b1e40c2b --- /dev/null +++ b/changelog.d/9975.misc @@ -0,0 +1 @@ +Minor enhancements to the `@cachedList` descriptor. diff --git a/changelog.d/9977.misc b/changelog.d/9977.misc new file mode 100644 index 0000000000..093dffc6be --- /dev/null +++ b/changelog.d/9977.misc @@ -0,0 +1 @@ +Split multipart email sending into a dedicated handler. diff --git a/changelog.d/9978.feature b/changelog.d/9978.feature new file mode 100644 index 0000000000..851adb9f6e --- /dev/null +++ b/changelog.d/9978.feature @@ -0,0 +1 @@ +Add a configuration option which allows enabling opentracing by user id. diff --git a/changelog.d/9980.doc b/changelog.d/9980.doc new file mode 100644 index 0000000000..d30ed0601d --- /dev/null +++ b/changelog.d/9980.doc @@ -0,0 +1 @@ +Clarify documentation around SSO mapping providers generating unique IDs and localparts. diff --git a/changelog.d/9981.misc b/changelog.d/9981.misc new file mode 100644 index 0000000000..677c9b4cbd --- /dev/null +++ b/changelog.d/9981.misc @@ -0,0 +1 @@ +Run `black` on files in the `scripts` directory. diff --git a/changelog.d/9984.misc b/changelog.d/9984.misc new file mode 100644 index 0000000000..97bd747f26 --- /dev/null +++ b/changelog.d/9984.misc @@ -0,0 +1 @@ +Simplify a few helper functions. diff --git a/changelog.d/9985.misc b/changelog.d/9985.misc new file mode 100644 index 0000000000..97bd747f26 --- /dev/null +++ b/changelog.d/9985.misc @@ -0,0 +1 @@ +Simplify a few helper functions. diff --git a/changelog.d/9986.misc b/changelog.d/9986.misc new file mode 100644 index 0000000000..97bd747f26 --- /dev/null +++ b/changelog.d/9986.misc @@ -0,0 +1 @@ +Simplify a few helper functions. diff --git a/changelog.d/9987.misc b/changelog.d/9987.misc new file mode 100644 index 0000000000..02c088e3e6 --- /dev/null +++ b/changelog.d/9987.misc @@ -0,0 +1 @@ +Remove unnecessary property from SQLBaseStore. diff --git a/changelog.d/9988.doc b/changelog.d/9988.doc new file mode 100644 index 0000000000..25338c44c3 --- /dev/null +++ b/changelog.d/9988.doc @@ -0,0 +1 @@ +Updates to the PostgreSQL documentation (`postgres.md`). diff --git a/changelog.d/9989.doc b/changelog.d/9989.doc new file mode 100644 index 0000000000..25338c44c3 --- /dev/null +++ b/changelog.d/9989.doc @@ -0,0 +1 @@ +Updates to the PostgreSQL documentation (`postgres.md`). diff --git a/docs/opentracing.md b/docs/opentracing.md index 4c7a56a5d7..f91362f112 100644 --- a/docs/opentracing.md +++ b/docs/opentracing.md @@ -42,17 +42,17 @@ To receive OpenTracing spans, start up a Jaeger server. This can be done using docker like so: ```sh -docker run -d --name jaeger +docker run -d --name jaeger \ -p 6831:6831/udp \ -p 6832:6832/udp \ -p 5778:5778 \ -p 16686:16686 \ -p 14268:14268 \ - jaegertracing/all-in-one:1.13 + jaegertracing/all-in-one:1 ``` Latest documentation is probably at -<https://www.jaegertracing.io/docs/1.13/getting-started/> +https://www.jaegertracing.io/docs/latest/getting-started. ## Enable OpenTracing in Synapse @@ -62,7 +62,7 @@ as shown in the [sample config](./sample_config.yaml). For example: ```yaml opentracing: - tracer_enabled: true + enabled: true homeserver_whitelist: - "mytrustedhomeserver.org" - "*.myotherhomeservers.com" @@ -90,4 +90,4 @@ to two problems, namely: ## Configuring Jaeger Sampling strategies can be set as in this document: -<https://www.jaegertracing.io/docs/1.13/sampling/> +<https://www.jaegertracing.io/docs/latest/sampling/>. diff --git a/docs/postgres.md b/docs/postgres.md index 680685d04e..f83155e52a 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -1,6 +1,6 @@ # Using Postgres -Postgres version 9.5 or later is known to work. +Synapse supports PostgreSQL versions 9.6 or later. ## Install postgres client libraries @@ -33,28 +33,15 @@ Assuming your PostgreSQL database user is called `postgres`, first authenticate # Or, if your system uses sudo to get administrative rights sudo -u postgres bash -Then, create a user ``synapse_user`` with: +Then, create a postgres user and a database with: + # this will prompt for a password for the new user createuser --pwprompt synapse_user -Before you can authenticate with the `synapse_user`, you must create a -database that it can access. To create a database, first connect to the -database with your database user: + createdb --encoding=UTF8 --locale=C --template=template0 --owner=synapse_user synapse - su - postgres # Or: sudo -u postgres bash - psql - -and then run: - - CREATE DATABASE synapse - ENCODING 'UTF8' - LC_COLLATE='C' - LC_CTYPE='C' - template=template0 - OWNER synapse_user; - -This would create an appropriate database named `synapse` owned by the -`synapse_user` user (which must already have been created as above). +The above will create a user called `synapse_user`, and a database called +`synapse`. Note that the PostgreSQL database *must* have the correct encoding set (as shown above), otherwise it will not be able to store UTF8 strings. @@ -63,79 +50,6 @@ You may need to enable password authentication so `synapse_user` can connect to the database. See <https://www.postgresql.org/docs/current/auth-pg-hba-conf.html>. -If you get an error along the lines of `FATAL: Ident authentication failed for -user "synapse_user"`, you may need to use an authentication method other than -`ident`: - -* If the `synapse_user` user has a password, add the password to the `database:` - section of `homeserver.yaml`. Then add the following to `pg_hba.conf`: - - ``` - host synapse synapse_user ::1/128 md5 # or `scram-sha-256` instead of `md5` if you use that - ``` - -* If the `synapse_user` user does not have a password, then a password doesn't - have to be added to `homeserver.yaml`. But the following does need to be added - to `pg_hba.conf`: - - ``` - host synapse synapse_user ::1/128 trust - ``` - -Note that line order matters in `pg_hba.conf`, so make sure that if you do add a -new line, it is inserted before: - -``` -host all all ::1/128 ident -``` - -### Fixing incorrect `COLLATE` or `CTYPE` - -Synapse will refuse to set up a new database if it has the wrong values of -`COLLATE` and `CTYPE` set, and will log warnings on existing databases. Using -different locales can cause issues if the locale library is updated from -underneath the database, or if a different version of the locale is used on any -replicas. - -The safest way to fix the issue is to take a dump and recreate the database with -the correct `COLLATE` and `CTYPE` parameters (as shown above). It is also possible to change the -parameters on a live database and run a `REINDEX` on the entire database, -however extreme care must be taken to avoid database corruption. - -Note that the above may fail with an error about duplicate rows if corruption -has already occurred, and such duplicate rows will need to be manually removed. - - -## Fixing inconsistent sequences error - -Synapse uses Postgres sequences to generate IDs for various tables. A sequence -and associated table can get out of sync if, for example, Synapse has been -downgraded and then upgraded again. - -To fix the issue shut down Synapse (including any and all workers) and run the -SQL command included in the error message. Once done Synapse should start -successfully. - - -## Tuning Postgres - -The default settings should be fine for most deployments. For larger -scale deployments tuning some of the settings is recommended, details of -which can be found at -<https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server>. - -In particular, we've found tuning the following values helpful for -performance: - -- `shared_buffers` -- `effective_cache_size` -- `work_mem` -- `maintenance_work_mem` -- `autovacuum_work_mem` - -Note that the appropriate values for those fields depend on the amount -of free memory the database host has available. - ## Synapse config When you are ready to start using PostgreSQL, edit the `database` @@ -165,18 +79,42 @@ may block for an extended period while it waits for a response from the database server. Example values might be: ```yaml -# seconds of inactivity after which TCP should send a keepalive message to the server -keepalives_idle: 10 +database: + args: + # ... as above + + # seconds of inactivity after which TCP should send a keepalive message to the server + keepalives_idle: 10 -# the number of seconds after which a TCP keepalive message that is not -# acknowledged by the server should be retransmitted -keepalives_interval: 10 + # the number of seconds after which a TCP keepalive message that is not + # acknowledged by the server should be retransmitted + keepalives_interval: 10 -# the number of TCP keepalives that can be lost before the client's connection -# to the server is considered dead -keepalives_count: 3 + # the number of TCP keepalives that can be lost before the client's connection + # to the server is considered dead + keepalives_count: 3 ``` +## Tuning Postgres + +The default settings should be fine for most deployments. For larger +scale deployments tuning some of the settings is recommended, details of +which can be found at +<https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server>. + +In particular, we've found tuning the following values helpful for +performance: + +- `shared_buffers` +- `effective_cache_size` +- `work_mem` +- `maintenance_work_mem` +- `autovacuum_work_mem` + +Note that the appropriate values for those fields depend on the amount +of free memory the database host has available. + + ## Porting from SQLite ### Overview @@ -185,9 +123,8 @@ The script `synapse_port_db` allows porting an existing synapse server backed by SQLite to using PostgreSQL. This is done in as a two phase process: -1. Copy the existing SQLite database to a separate location (while the - server is down) and running the port script against that offline - database. +1. Copy the existing SQLite database to a separate location and run + the port script against that offline database. 2. Shut down the server. Rerun the port script to port any data that has come in since taking the first snapshot. Restart server against the PostgreSQL database. @@ -245,3 +182,60 @@ PostgreSQL database configuration file `homeserver-postgres.yaml`: ./synctl start Synapse should now be running against PostgreSQL. + + +## Troubleshooting + +### Alternative auth methods + +If you get an error along the lines of `FATAL: Ident authentication failed for +user "synapse_user"`, you may need to use an authentication method other than +`ident`: + +* If the `synapse_user` user has a password, add the password to the `database:` + section of `homeserver.yaml`. Then add the following to `pg_hba.conf`: + + ``` + host synapse synapse_user ::1/128 md5 # or `scram-sha-256` instead of `md5` if you use that + ``` + +* If the `synapse_user` user does not have a password, then a password doesn't + have to be added to `homeserver.yaml`. But the following does need to be added + to `pg_hba.conf`: + + ``` + host synapse synapse_user ::1/128 trust + ``` + +Note that line order matters in `pg_hba.conf`, so make sure that if you do add a +new line, it is inserted before: + +``` +host all all ::1/128 ident +``` + +### Fixing incorrect `COLLATE` or `CTYPE` + +Synapse will refuse to set up a new database if it has the wrong values of +`COLLATE` and `CTYPE` set, and will log warnings on existing databases. Using +different locales can cause issues if the locale library is updated from +underneath the database, or if a different version of the locale is used on any +replicas. + +The safest way to fix the issue is to dump the database and recreate it with +the correct locale parameter (as shown above). It is also possible to change the +parameters on a live database and run a `REINDEX` on the entire database, +however extreme care must be taken to avoid database corruption. + +Note that the above may fail with an error about duplicate rows if corruption +has already occurred, and such duplicate rows will need to be manually removed. + +### Fixing inconsistent sequences error + +Synapse uses Postgres sequences to generate IDs for various tables. A sequence +and associated table can get out of sync if, for example, Synapse has been +downgraded and then upgraded again. + +To fix the issue shut down Synapse (including any and all workers) and run the +SQL command included in the error message. Once done Synapse should start +successfully. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 67ad57b1aa..2952f2ba32 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2845,7 +2845,8 @@ opentracing: #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # See docs/opentracing.rst + # See docs/opentracing.rst. + # # This is a list of regexes which are matched against the server_name of the # homeserver. # @@ -2854,19 +2855,26 @@ opentracing: #homeserver_whitelist: # - ".*" + # A list of the matrix IDs of users whose requests will always be traced, + # even if the tracing system would otherwise drop the traces due to + # probabilistic sampling. + # + # By default, the list is empty. + # + #force_tracing_for_users: + # - "@user1:server_name" + # - "@user2:server_name" + # Jaeger can be configured to sample traces at different rates. # All configuration options provided by Jaeger can be set here. - # Jaeger's configuration mostly related to trace sampling which + # Jaeger's configuration is mostly related to trace sampling which # is documented here: - # https://www.jaegertracing.io/docs/1.13/sampling/. + # https://www.jaegertracing.io/docs/latest/sampling/. # #jaeger_config: # sampler: # type: const # param: 1 - - # Logging whether spans were started and reported - # # logging: # false diff --git a/docs/sso_mapping_providers.md b/docs/sso_mapping_providers.md index 50020d1a4a..6db2dc8be5 100644 --- a/docs/sso_mapping_providers.md +++ b/docs/sso_mapping_providers.md @@ -67,8 +67,8 @@ A custom mapping provider must specify the following methods: - Arguments: - `userinfo` - A `authlib.oidc.core.claims.UserInfo` object to extract user information from. - - This method must return a string, which is the unique identifier for the - user. Commonly the ``sub`` claim of the response. + - This method must return a string, which is the unique, immutable identifier + for the user. Commonly the `sub` claim of the response. * `map_user_attributes(self, userinfo, token, failures)` - This method must be async. - Arguments: @@ -87,7 +87,9 @@ A custom mapping provider must specify the following methods: `localpart` value, such as `john.doe1`. - Returns a dictionary with two keys: - `localpart`: A string, used to generate the Matrix ID. If this is - `None`, the user is prompted to pick their own username. + `None`, the user is prompted to pick their own username. This is only used + during a user's first login. Once a localpart has been associated with a + remote user ID (see `get_remote_user_id`) it cannot be updated. - `displayname`: An optional string, the display name for the user. * `get_extra_attributes(self, userinfo, token)` - This method must be async. @@ -153,8 +155,8 @@ A custom mapping provider must specify the following methods: information from. - `client_redirect_url` - A string, the URL that the client will be redirected to. - - This method must return a string, which is the unique identifier for the - user. Commonly the ``uid`` claim of the response. + - This method must return a string, which is the unique, immutable identifier + for the user. Commonly the `uid` claim of the response. * `saml_response_to_user_attributes(self, saml_response, failures, client_redirect_url)` - Arguments: - `saml_response` - A `saml2.response.AuthnResponse` object to extract user @@ -172,8 +174,10 @@ A custom mapping provider must specify the following methods: redirected to. - This method must return a dictionary, which will then be used by Synapse to build a new user. The following keys are allowed: - * `mxid_localpart` - The mxid localpart of the new user. If this is - `None`, the user is prompted to pick their own username. + * `mxid_localpart` - A string, the mxid localpart of the new user. If this is + `None`, the user is prompted to pick their own username. This is only used + during a user's first login. Once a localpart has been associated with a + remote user ID (see `get_remote_user_id`) it cannot be updated. * `displayname` - The displayname of the new user. If not provided, will default to the value of `mxid_localpart`. * `emails` - A list of emails for the new user. If not provided, will diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages index 07d018db99..546724f89f 100755 --- a/scripts-dev/build_debian_packages +++ b/scripts-dev/build_debian_packages @@ -21,18 +21,18 @@ DISTS = ( "debian:buster", "debian:bullseye", "debian:sid", - "ubuntu:bionic", # 18.04 LTS (our EOL forced by Py36 on 2021-12-23) - "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) - "ubuntu:groovy", # 20.10 (EOL 2021-07-07) + "ubuntu:bionic", # 18.04 LTS (our EOL forced by Py36 on 2021-12-23) + "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) + "ubuntu:groovy", # 20.10 (EOL 2021-07-07) "ubuntu:hirsute", # 21.04 (EOL 2022-01-05) ) -DESC = '''\ +DESC = """\ Builds .debs for synapse, using a Docker image for the build environment. By default, builds for all known distributions, but a list of distributions can be passed on the commandline for debugging. -''' +""" class Builder(object): @@ -46,7 +46,7 @@ class Builder(object): """Build deb for a single distribution""" if self._failed: - print("not building %s due to earlier failure" % (dist, )) + print("not building %s due to earlier failure" % (dist,)) raise Exception("failed") try: @@ -68,48 +68,65 @@ class Builder(object): # we tend to get source packages which are full of debs. (We could hack # around that with more magic in the build_debian.sh script, but that # doesn't solve the problem for natively-run dpkg-buildpakage). - debsdir = os.path.join(projdir, '../debs') + debsdir = os.path.join(projdir, "../debs") os.makedirs(debsdir, exist_ok=True) if self.redirect_stdout: - logfile = os.path.join(debsdir, "%s.buildlog" % (tag, )) + logfile = os.path.join(debsdir, "%s.buildlog" % (tag,)) print("building %s: directing output to %s" % (dist, logfile)) stdout = open(logfile, "w") else: stdout = None # first build a docker image for the build environment - subprocess.check_call([ - "docker", "build", - "--tag", "dh-venv-builder:" + tag, - "--build-arg", "distro=" + dist, - "-f", "docker/Dockerfile-dhvirtualenv", - "docker", - ], stdout=stdout, stderr=subprocess.STDOUT) + subprocess.check_call( + [ + "docker", + "build", + "--tag", + "dh-venv-builder:" + tag, + "--build-arg", + "distro=" + dist, + "-f", + "docker/Dockerfile-dhvirtualenv", + "docker", + ], + stdout=stdout, + stderr=subprocess.STDOUT, + ) container_name = "synapse_build_" + tag with self._lock: self.active_containers.add(container_name) # then run the build itself - subprocess.check_call([ - "docker", "run", - "--rm", - "--name", container_name, - "--volume=" + projdir + ":/synapse/source:ro", - "--volume=" + debsdir + ":/debs", - "-e", "TARGET_USERID=%i" % (os.getuid(), ), - "-e", "TARGET_GROUPID=%i" % (os.getgid(), ), - "-e", "DEB_BUILD_OPTIONS=%s" % ("nocheck" if skip_tests else ""), - "dh-venv-builder:" + tag, - ], stdout=stdout, stderr=subprocess.STDOUT) + subprocess.check_call( + [ + "docker", + "run", + "--rm", + "--name", + container_name, + "--volume=" + projdir + ":/synapse/source:ro", + "--volume=" + debsdir + ":/debs", + "-e", + "TARGET_USERID=%i" % (os.getuid(),), + "-e", + "TARGET_GROUPID=%i" % (os.getgid(),), + "-e", + "DEB_BUILD_OPTIONS=%s" % ("nocheck" if skip_tests else ""), + "dh-venv-builder:" + tag, + ], + stdout=stdout, + stderr=subprocess.STDOUT, + ) with self._lock: self.active_containers.remove(container_name) if stdout is not None: stdout.close() - print("Completed build of %s" % (dist, )) + print("Completed build of %s" % (dist,)) def kill_containers(self): with self._lock: @@ -117,9 +134,14 @@ class Builder(object): for c in active: print("killing container %s" % (c,)) - subprocess.run([ - "docker", "kill", c, - ], stdout=subprocess.DEVNULL) + subprocess.run( + [ + "docker", + "kill", + c, + ], + stdout=subprocess.DEVNULL, + ) with self._lock: self.active_containers.remove(c) @@ -130,31 +152,38 @@ def run_builds(dists, jobs=1, skip_tests=False): def sig(signum, _frame): print("Caught SIGINT") builder.kill_containers() + signal.signal(signal.SIGINT, sig) with ThreadPoolExecutor(max_workers=jobs) as e: res = e.map(lambda dist: builder.run_build(dist, skip_tests), dists) # make sure we consume the iterable so that exceptions are raised. - for r in res: + for _ in res: pass -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( description=DESC, ) parser.add_argument( - '-j', '--jobs', type=int, default=1, - help='specify the number of builds to run in parallel', + "-j", + "--jobs", + type=int, + default=1, + help="specify the number of builds to run in parallel", ) parser.add_argument( - '--no-check', action='store_true', - help='skip running tests after building', + "--no-check", + action="store_true", + help="skip running tests after building", ) parser.add_argument( - 'dist', nargs='*', default=DISTS, - help='a list of distributions to build for. Default: %(default)s', + "dist", + nargs="*", + default=DISTS, + help="a list of distributions to build for. Default: %(default)s", ) args = parser.parse_args() run_builds(dists=args.dist, jobs=args.jobs, skip_tests=args.no_check) diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh index 9761e97594..869eb2372d 100755 --- a/scripts-dev/lint.sh +++ b/scripts-dev/lint.sh @@ -80,8 +80,22 @@ else # then lint everything! if [[ -z ${files+x} ]]; then # Lint all source code files and directories - # Note: this list aims the mirror the one in tox.ini - files=("synapse" "docker" "tests" "scripts-dev" "scripts" "contrib" "synctl" "setup.py" "synmark" "stubs" ".buildkite") + # Note: this list aims to mirror the one in tox.ini + files=( + "synapse" "docker" "tests" + # annoyingly, black doesn't find these so we have to list them + "scripts/export_signing_key" + "scripts/generate_config" + "scripts/generate_log_config" + "scripts/hash_password" + "scripts/register_new_matrix_user" + "scripts/synapse_port_db" + "scripts-dev" + "scripts-dev/build_debian_packages" + "scripts-dev/sign_json" + "scripts-dev/update_database" + "contrib" "synctl" "setup.py" "synmark" "stubs" ".buildkite" + ) fi fi diff --git a/scripts/export_signing_key b/scripts/export_signing_key index 0ed167ea85..bf0139bd64 100755 --- a/scripts/export_signing_key +++ b/scripts/export_signing_key @@ -30,7 +30,11 @@ def exit(status: int = 0, message: Optional[str] = None): def format_plain(public_key: nacl.signing.VerifyKey): print( "%s:%s %s" - % (public_key.alg, public_key.version, encode_verify_key_base64(public_key),) + % ( + public_key.alg, + public_key.version, + encode_verify_key_base64(public_key), + ) ) @@ -50,7 +54,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "key_file", nargs="+", type=argparse.FileType("r"), help="The key file to read", + "key_file", + nargs="+", + type=argparse.FileType("r"), + help="The key file to read", ) parser.add_argument( @@ -63,7 +70,7 @@ if __name__ == "__main__": parser.add_argument( "--expiry-ts", type=int, - default=int(time.time() * 1000) + 6*3600000, + default=int(time.time() * 1000) + 6 * 3600000, help=( "The expiry time to use for -x, in milliseconds since 1970. The default " "is (now+6h)." diff --git a/scripts/generate_config b/scripts/generate_config index 771cbf8d95..931b40c045 100755 --- a/scripts/generate_config +++ b/scripts/generate_config @@ -11,23 +11,22 @@ if __name__ == "__main__": parser.add_argument( "--config-dir", default="CONFDIR", - help="The path where the config files are kept. Used to create filenames for " - "things like the log config and the signing key. Default: %(default)s", + "things like the log config and the signing key. Default: %(default)s", ) parser.add_argument( "--data-dir", default="DATADIR", help="The path where the data files are kept. Used to create filenames for " - "things like the database and media store. Default: %(default)s", + "things like the database and media store. Default: %(default)s", ) parser.add_argument( "--server-name", default="SERVERNAME", help="The server name. Used to initialise the server_name config param, but also " - "used in the names of some of the config files. Default: %(default)s", + "used in the names of some of the config files. Default: %(default)s", ) parser.add_argument( @@ -41,21 +40,22 @@ if __name__ == "__main__": "--generate-secrets", action="store_true", help="Enable generation of new secrets for things like the macaroon_secret_key." - "By default, these parameters will be left unset." + "By default, these parameters will be left unset.", ) parser.add_argument( - "-o", "--output-file", - type=argparse.FileType('w'), + "-o", + "--output-file", + type=argparse.FileType("w"), default=sys.stdout, help="File to write the configuration to. Default: stdout", ) parser.add_argument( "--header-file", - type=argparse.FileType('r'), + type=argparse.FileType("r"), help="File from which to read a header, which will be printed before the " - "generated config.", + "generated config.", ) args = parser.parse_args() diff --git a/scripts/hash_password b/scripts/hash_password index a30767f758..1d6fb0d700 100755 --- a/scripts/hash_password +++ b/scripts/hash_password @@ -41,7 +41,7 @@ if __name__ == "__main__": parser.add_argument( "-c", "--config", - type=argparse.FileType('r'), + type=argparse.FileType("r"), help=( "Path to server config file. " "Used to read in bcrypt_rounds and password_pepper." @@ -72,8 +72,8 @@ if __name__ == "__main__": pw = unicodedata.normalize("NFKC", password) hashed = bcrypt.hashpw( - pw.encode('utf8') + password_pepper.encode("utf8"), + pw.encode("utf8") + password_pepper.encode("utf8"), bcrypt.gensalt(bcrypt_rounds), - ).decode('ascii') + ).decode("ascii") print(hashed) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 5fb5bb35f7..7c7645c05a 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -294,8 +294,7 @@ class Porter(object): return table, already_ported, total_to_port, forward_chunk, backward_chunk async def get_table_constraints(self) -> Dict[str, Set[str]]: - """Returns a map of tables that have foreign key constraints to tables they depend on. - """ + """Returns a map of tables that have foreign key constraints to tables they depend on.""" def _get_constraints(txn): # We can pull the information about foreign key constraints out from @@ -504,7 +503,9 @@ class Porter(object): return def build_db_store( - self, db_config: DatabaseConnectionConfig, allow_outdated_version: bool = False, + self, + db_config: DatabaseConnectionConfig, + allow_outdated_version: bool = False, ): """Builds and returns a database store using the provided configuration. @@ -740,7 +741,7 @@ class Porter(object): return col outrows = [] - for i, row in enumerate(rows): + for row in rows: try: outrows.append( tuple(conv(j, col) for j, col in enumerate(row) if j > 0) @@ -890,8 +891,7 @@ class Porter(object): await self.postgres_store.db_pool.runInteraction("setup_user_id_seq", r) async def _setup_events_stream_seqs(self) -> None: - """Set the event stream sequences to the correct values. - """ + """Set the event stream sequences to the correct values.""" # We get called before we've ported the events table, so we need to # fetch the current positions from the SQLite store. @@ -920,12 +920,14 @@ class Porter(object): ) await self.postgres_store.db_pool.runInteraction( - "_setup_events_stream_seqs", _setup_events_stream_seqs_set_pos, + "_setup_events_stream_seqs", + _setup_events_stream_seqs_set_pos, ) - async def _setup_sequence(self, sequence_name: str, stream_id_tables: Iterable[str]) -> None: - """Set a sequence to the correct value. - """ + async def _setup_sequence( + self, sequence_name: str, stream_id_tables: Iterable[str] + ) -> None: + """Set a sequence to the correct value.""" current_stream_ids = [] for stream_id_table in stream_id_tables: max_stream_id = await self.sqlite_store.db_pool.simple_select_one_onecol( @@ -939,14 +941,19 @@ class Porter(object): next_id = max(current_stream_ids) + 1 def r(txn): - sql = "ALTER SEQUENCE %s RESTART WITH" % (sequence_name, ) - txn.execute(sql + " %s", (next_id, )) + sql = "ALTER SEQUENCE %s RESTART WITH" % (sequence_name,) + txn.execute(sql + " %s", (next_id,)) - await self.postgres_store.db_pool.runInteraction("_setup_%s" % (sequence_name,), r) + await self.postgres_store.db_pool.runInteraction( + "_setup_%s" % (sequence_name,), r + ) async def _setup_auth_chain_sequence(self) -> None: curr_chain_id = await self.sqlite_store.db_pool.simple_select_one_onecol( - table="event_auth_chains", keyvalues={}, retcol="MAX(chain_id)", allow_none=True + table="event_auth_chains", + keyvalues={}, + retcol="MAX(chain_id)", + allow_none=True, ) def r(txn): @@ -968,8 +975,7 @@ class Porter(object): class Progress(object): - """Used to report progress of the port - """ + """Used to report progress of the port""" def __init__(self): self.tables = {} @@ -994,8 +1000,7 @@ class Progress(object): class CursesProgress(Progress): - """Reports progress to a curses window - """ + """Reports progress to a curses window""" def __init__(self, stdscr): self.stdscr = stdscr @@ -1020,7 +1025,7 @@ class CursesProgress(Progress): self.total_processed = 0 self.total_remaining = 0 - for table, data in self.tables.items(): + for data in self.tables.values(): self.total_processed += data["num_done"] - data["start"] self.total_remaining += data["total"] - data["num_done"] @@ -1111,8 +1116,7 @@ class CursesProgress(Progress): class TerminalProgress(Progress): - """Just prints progress to the terminal - """ + """Just prints progress to the terminal""" def update(self, table, num_done): super(TerminalProgress, self).update(table, num_done) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index efc926d094..458306eba5 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -87,6 +87,7 @@ class Auth: ) self._track_appservice_user_ips = hs.config.track_appservice_user_ips self._macaroon_secret_key = hs.config.macaroon_secret_key + self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users async def check_from_context( self, room_version: str, event, context, do_sig_check=True @@ -208,6 +209,8 @@ class Auth: opentracing.set_tag("authenticated_entity", user_id) opentracing.set_tag("user_id", user_id) opentracing.set_tag("appservice_id", app_service.id) + if user_id in self._force_tracing_for_users: + opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) return requester @@ -260,6 +263,8 @@ class Auth: opentracing.set_tag("user_id", user_info.user_id) if device_id: opentracing.set_tag("device_id", device_id) + if user_info.token_owner in self._force_tracing_for_users: + opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) return requester except KeyError: diff --git a/synapse/config/registration.py b/synapse/config/registration.py index e6f52b4f40..d9dc55a0c3 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -349,4 +349,4 @@ class RegistrationConfig(Config): def read_arguments(self, args): if args.enable_registration is not None: - self.enable_registration = bool(strtobool(str(args.enable_registration))) + self.enable_registration = strtobool(str(args.enable_registration)) diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index db22b5b19f..d0ea17261f 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Set + from synapse.python_dependencies import DependencyException, check_requirements from ._base import Config, ConfigError @@ -32,6 +34,8 @@ class TracerConfig(Config): {"sampler": {"type": "const", "param": 1}, "logging": False}, ) + self.force_tracing_for_users: Set[str] = set() + if not self.opentracer_enabled: return @@ -48,6 +52,19 @@ class TracerConfig(Config): if not isinstance(self.opentracer_whitelist, list): raise ConfigError("Tracer homeserver_whitelist config is malformed") + force_tracing_for_users = opentracing_config.get("force_tracing_for_users", []) + if not isinstance(force_tracing_for_users, list): + raise ConfigError( + "Expected a list", ("opentracing", "force_tracing_for_users") + ) + for i, u in enumerate(force_tracing_for_users): + if not isinstance(u, str): + raise ConfigError( + "Expected a string", + ("opentracing", "force_tracing_for_users", f"index {i}"), + ) + self.force_tracing_for_users.add(u) + def generate_config_section(cls, **kwargs): return """\ ## Opentracing ## @@ -64,7 +81,8 @@ class TracerConfig(Config): #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # See docs/opentracing.rst + # See docs/opentracing.rst. + # # This is a list of regexes which are matched against the server_name of the # homeserver. # @@ -73,19 +91,26 @@ class TracerConfig(Config): #homeserver_whitelist: # - ".*" + # A list of the matrix IDs of users whose requests will always be traced, + # even if the tracing system would otherwise drop the traces due to + # probabilistic sampling. + # + # By default, the list is empty. + # + #force_tracing_for_users: + # - "@user1:server_name" + # - "@user2:server_name" + # Jaeger can be configured to sample traces at different rates. # All configuration options provided by Jaeger can be set here. - # Jaeger's configuration mostly related to trace sampling which + # Jaeger's configuration is mostly related to trace sampling which # is documented here: - # https://www.jaegertracing.io/docs/1.13/sampling/. + # https://www.jaegertracing.io/docs/latest/sampling/. # #jaeger_config: # sampler: # type: const # param: 1 - - # Logging whether spans were started and reported - # # logging: # false """ diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index 5b927f10b3..d752cf34f0 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -15,12 +15,9 @@ import email.mime.multipart import email.utils import logging -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText from typing import TYPE_CHECKING, List, Optional, Tuple from synapse.api.errors import StoreError, SynapseError -from synapse.logging.context import make_deferred_yieldable from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.types import UserID from synapse.util import stringutils @@ -36,9 +33,11 @@ class AccountValidityHandler: self.hs = hs self.config = hs.config self.store = self.hs.get_datastore() - self.sendmail = self.hs.get_sendmail() + self.send_email_handler = self.hs.get_send_email_handler() self.clock = self.hs.get_clock() + self._app_name = self.hs.config.email_app_name + self._account_validity_enabled = ( hs.config.account_validity.account_validity_enabled ) @@ -63,23 +62,10 @@ class AccountValidityHandler: self._template_text = ( hs.config.account_validity.account_validity_template_text ) - account_validity_renew_email_subject = ( + self._renew_email_subject = ( hs.config.account_validity.account_validity_renew_email_subject ) - try: - app_name = hs.config.email_app_name - - self._subject = account_validity_renew_email_subject % {"app": app_name} - - self._from_string = hs.config.email_notif_from % {"app": app_name} - except Exception: - # If substitution failed, fall back to the bare strings. - self._subject = account_validity_renew_email_subject - self._from_string = hs.config.email_notif_from - - self._raw_from = email.utils.parseaddr(self._from_string)[1] - # Check the renewal emails to send and send them every 30min. if hs.config.run_background_tasks: self.clock.looping_call(self._send_renewal_emails, 30 * 60 * 1000) @@ -159,38 +145,17 @@ class AccountValidityHandler: } html_text = self._template_html.render(**template_vars) - html_part = MIMEText(html_text, "html", "utf8") - plain_text = self._template_text.render(**template_vars) - text_part = MIMEText(plain_text, "plain", "utf8") for address in addresses: raw_to = email.utils.parseaddr(address)[1] - multipart_msg = MIMEMultipart("alternative") - multipart_msg["Subject"] = self._subject - multipart_msg["From"] = self._from_string - multipart_msg["To"] = address - multipart_msg["Date"] = email.utils.formatdate() - multipart_msg["Message-ID"] = email.utils.make_msgid() - multipart_msg.attach(text_part) - multipart_msg.attach(html_part) - - logger.info("Sending renewal email to %s", address) - - await make_deferred_yieldable( - self.sendmail( - self.hs.config.email_smtp_host, - self._raw_from, - raw_to, - multipart_msg.as_string().encode("utf8"), - reactor=self.hs.get_reactor(), - port=self.hs.config.email_smtp_port, - requireAuthentication=self.hs.config.email_smtp_user is not None, - username=self.hs.config.email_smtp_user, - password=self.hs.config.email_smtp_pass, - requireTransportSecurity=self.hs.config.require_transport_security, - ) + await self.send_email_handler.send_email( + email_address=raw_to, + subject=self._renew_email_subject, + app_name=self._app_name, + html=html_text, + text=plain_text, ) await self.store.set_renewal_mail_status(user_id=user_id, email_sent=True) diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py new file mode 100644 index 0000000000..e9f6aef06f --- /dev/null +++ b/synapse/handlers/send_email.py @@ -0,0 +1,98 @@ +# Copyright 2021 The Matrix.org C.I.C. Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import email.utils +import logging +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import TYPE_CHECKING + +from synapse.logging.context import make_deferred_yieldable + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class SendEmailHandler: + def __init__(self, hs: "HomeServer"): + self.hs = hs + + self._sendmail = hs.get_sendmail() + self._reactor = hs.get_reactor() + + self._from = hs.config.email.email_notif_from + self._smtp_host = hs.config.email.email_smtp_host + self._smtp_port = hs.config.email.email_smtp_port + self._smtp_user = hs.config.email.email_smtp_user + self._smtp_pass = hs.config.email.email_smtp_pass + self._require_transport_security = hs.config.email.require_transport_security + + async def send_email( + self, + email_address: str, + subject: str, + app_name: str, + html: str, + text: str, + ) -> None: + """Send a multipart email with the given information. + + Args: + email_address: The address to send the email to. + subject: The email's subject. + app_name: The app name to include in the From header. + html: The HTML content to include in the email. + text: The plain text content to include in the email. + """ + try: + from_string = self._from % {"app": app_name} + except (KeyError, TypeError): + from_string = self._from + + raw_from = email.utils.parseaddr(from_string)[1] + raw_to = email.utils.parseaddr(email_address)[1] + + if raw_to == "": + raise RuntimeError("Invalid 'to' address") + + html_part = MIMEText(html, "html", "utf8") + text_part = MIMEText(text, "plain", "utf8") + + multipart_msg = MIMEMultipart("alternative") + multipart_msg["Subject"] = subject + multipart_msg["From"] = from_string + multipart_msg["To"] = email_address + multipart_msg["Date"] = email.utils.formatdate() + multipart_msg["Message-ID"] = email.utils.make_msgid() + multipart_msg.attach(text_part) + multipart_msg.attach(html_part) + + logger.info("Sending email to %s" % email_address) + + await make_deferred_yieldable( + self._sendmail( + self._smtp_host, + raw_from, + raw_to, + multipart_msg.as_string().encode("utf8"), + reactor=self._reactor, + port=self._smtp_port, + requireAuthentication=self._smtp_user is not None, + username=self._smtp_user, + password=self._smtp_pass, + requireTransportSecurity=self._require_transport_security, + ) + ) diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index c4b43b0d3f..5f9ea5003a 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -12,12 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import email.mime.multipart -import email.utils import logging import urllib.parse -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, TypeVar import bleach @@ -27,7 +23,6 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.errors import StoreError from synapse.config.emailconfig import EmailSubjectConfig from synapse.events import EventBase -from synapse.logging.context import make_deferred_yieldable from synapse.push.presentable_names import ( calculate_room_name, descriptor_from_member_events, @@ -108,7 +103,7 @@ class Mailer: self.template_html = template_html self.template_text = template_text - self.sendmail = self.hs.get_sendmail() + self.send_email_handler = hs.get_send_email_handler() self.store = self.hs.get_datastore() self.state_store = self.hs.get_storage().state self.macaroon_gen = self.hs.get_macaroon_generator() @@ -310,17 +305,6 @@ class Mailer: self, email_address: str, subject: str, extra_template_vars: Dict[str, Any] ) -> None: """Send an email with the given information and template text""" - try: - from_string = self.hs.config.email_notif_from % {"app": self.app_name} - except TypeError: - from_string = self.hs.config.email_notif_from - - raw_from = email.utils.parseaddr(from_string)[1] - raw_to = email.utils.parseaddr(email_address)[1] - - if raw_to == "": - raise RuntimeError("Invalid 'to' address") - template_vars = { "app_name": self.app_name, "server_name": self.hs.config.server.server_name, @@ -329,35 +313,14 @@ class Mailer: template_vars.update(extra_template_vars) html_text = self.template_html.render(**template_vars) - html_part = MIMEText(html_text, "html", "utf8") - plain_text = self.template_text.render(**template_vars) - text_part = MIMEText(plain_text, "plain", "utf8") - - multipart_msg = MIMEMultipart("alternative") - multipart_msg["Subject"] = subject - multipart_msg["From"] = from_string - multipart_msg["To"] = email_address - multipart_msg["Date"] = email.utils.formatdate() - multipart_msg["Message-ID"] = email.utils.make_msgid() - multipart_msg.attach(text_part) - multipart_msg.attach(html_part) - - logger.info("Sending email to %s" % email_address) - - await make_deferred_yieldable( - self.sendmail( - self.hs.config.email_smtp_host, - raw_from, - raw_to, - multipart_msg.as_string().encode("utf8"), - reactor=self.hs.get_reactor(), - port=self.hs.config.email_smtp_port, - requireAuthentication=self.hs.config.email_smtp_user is not None, - username=self.hs.config.email_smtp_user, - password=self.hs.config.email_smtp_pass, - requireTransportSecurity=self.hs.config.require_transport_security, - ) + + await self.send_email_handler.send_email( + email_address=email_address, + subject=subject, + app_name=self.app_name, + html=html_text, + text=plain_text, ) async def _get_room_vars( diff --git a/synapse/server.py b/synapse/server.py index 2337d2d9b4..fec0024c89 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -104,6 +104,7 @@ from synapse.handlers.room_list import RoomListHandler from synapse.handlers.room_member import RoomMemberHandler, RoomMemberMasterHandler from synapse.handlers.room_member_worker import RoomMemberWorkerHandler from synapse.handlers.search import SearchHandler +from synapse.handlers.send_email import SendEmailHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.space_summary import SpaceSummaryHandler from synapse.handlers.sso import SsoHandler @@ -550,6 +551,10 @@ class HomeServer(metaclass=abc.ABCMeta): return SearchHandler(self) @cache_in_self + def get_send_email_handler(self) -> SendEmailHandler: + return SendEmailHandler(self) + + @cache_in_self def get_set_password_handler(self) -> SetPasswordHandler: return SetPasswordHandler(self) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 3d98d3f5f8..0623da9aa1 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import random from abc import ABCMeta from typing import TYPE_CHECKING, Any, Collection, Iterable, Optional, Union @@ -44,7 +43,6 @@ class SQLBaseStore(metaclass=ABCMeta): self._clock = hs.get_clock() self.database_engine = database.engine self.db_pool = database - self.rand = random.SystemRandom() def process_replication_rows( self, diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index c9346de316..a1f98b7e38 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -665,7 +665,7 @@ class DeviceWorkerStore(SQLBaseStore): cached_method_name="get_device_list_last_stream_id_for_remote", list_name="user_ids", ) - async def get_device_list_last_stream_id_for_remotes(self, user_ids: str): + async def get_device_list_last_stream_id_for_remotes(self, user_ids: Iterable[str]): rows = await self.db_pool.simple_select_many_batch( table="device_lists_remote_extremeties", column="user_id", diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 398d6b6acb..9ba5778a88 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -473,7 +473,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore): num_args=1, ) async def _get_bare_e2e_cross_signing_keys_bulk( - self, user_ids: List[str] + self, user_ids: Iterable[str] ) -> Dict[str, Dict[str, dict]]: """Returns the cross-signing keys for a set of users. The output of this function should be passed to _get_e2e_cross_signing_signatures_txn if @@ -497,7 +497,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore): def _get_bare_e2e_cross_signing_keys_bulk_txn( self, txn: Connection, - user_ids: List[str], + user_ids: Iterable[str], ) -> Dict[str, Dict[str, dict]]: """Returns the cross-signing keys for a set of users. The output of this function should be passed to _get_e2e_cross_signing_signatures_txn if diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 6e5ee557d2..e5c5cf8ff0 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import random import re from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union @@ -997,7 +998,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): expiration_ts = now_ms + self._account_validity_period if use_delta: - expiration_ts = self.rand.randrange( + expiration_ts = random.randrange( expiration_ts - self._account_validity_startup_job_max_delta, expiration_ts, ) diff --git a/synapse/storage/databases/main/user_erasure_store.py b/synapse/storage/databases/main/user_erasure_store.py index acf6b2fb64..1ecdd40c38 100644 --- a/synapse/storage/databases/main/user_erasure_store.py +++ b/synapse/storage/databases/main/user_erasure_store.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Iterable + from synapse.storage._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList @@ -37,21 +39,16 @@ class UserErasureWorkerStore(SQLBaseStore): return bool(result) @cachedList(cached_method_name="is_user_erased", list_name="user_ids") - async def are_users_erased(self, user_ids): + async def are_users_erased(self, user_ids: Iterable[str]) -> Dict[str, bool]: """ Checks which users in a list have requested erasure Args: - user_ids (iterable[str]): full user id to check + user_ids: full user ids to check Returns: - dict[str, bool]: - for each user, whether the user has requested erasure. + for each user, whether the user has requested erasure. """ - # this serves the dual purpose of (a) making sure we can do len and - # iterate it multiple times, and (b) avoiding duplicates. - user_ids = tuple(set(user_ids)) - rows = await self.db_pool.simple_select_many_batch( table="erased_users", column="user_id", diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index ac4a078b26..3a4d027095 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -322,8 +322,8 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): class DeferredCacheListDescriptor(_CacheDescriptorBase): """Wraps an existing cache to support bulk fetching of keys. - Given a list of keys it looks in the cache to find any hits, then passes - the list of missing keys to the wrapped function. + Given an iterable of keys it looks in the cache to find any hits, then passes + the tuple of missing keys to the wrapped function. Once wrapped, the function returns a Deferred which resolves to the list of results. @@ -437,7 +437,9 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): return f args_to_call = dict(arg_dict) - args_to_call[self.list_name] = list(missing) + # copy the missing set before sending it to the callee, to guard against + # modification. + args_to_call[self.list_name] = tuple(missing) cached_defers.append( defer.maybeDeferred( @@ -522,14 +524,14 @@ def cachedList( Used to do batch lookups for an already created cache. A single argument is specified as a list that is iterated through to lookup keys in the - original cache. A new list consisting of the keys that weren't in the cache - get passed to the original function, the result of which is stored in the + original cache. A new tuple consisting of the (deduplicated) keys that weren't in + the cache gets passed to the original function, the result of which is stored in the cache. Args: cached_method_name: The name of the single-item lookup method. This is only used to find the cache to use. - list_name: The name of the argument that is the list to use to + list_name: The name of the argument that is the iterable to use to do batch lookups in the cache. num_args: Number of arguments to use as the key in the cache (including list_name). Defaults to all named parameters. diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 4f25cd1d26..f029432191 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools -import random import re +import secrets import string from collections.abc import Iterable from typing import Optional, Tuple @@ -35,26 +35,27 @@ CLIENT_SECRET_REGEX = re.compile(r"^[0-9a-zA-Z\.=_\-]+$") # MXC_REGEX = re.compile("^mxc://([^/]+)/([^/#?]+)$") -# random_string and random_string_with_symbols are used for a range of things, -# some cryptographically important, some less so. We use SystemRandom to make sure -# we get cryptographically-secure randoms. -rand = random.SystemRandom() - def random_string(length: int) -> str: - return "".join(rand.choice(string.ascii_letters) for _ in range(length)) + """Generate a cryptographically secure string of random letters. + + Drawn from the characters: `a-z` and `A-Z` + """ + return "".join(secrets.choice(string.ascii_letters) for _ in range(length)) def random_string_with_symbols(length: int) -> str: - return "".join(rand.choice(_string_with_symbols) for _ in range(length)) + """Generate a cryptographically secure string of random letters/numbers/symbols. + + Drawn from the characters: `a-z`, `A-Z`, `0-9`, and `.,;:^&*-_+=#~@` + """ + return "".join(secrets.choice(_string_with_symbols) for _ in range(length)) def is_ascii(s: bytes) -> bool: try: s.decode("ascii").encode("ascii") - except UnicodeDecodeError: - return False - except UnicodeEncodeError: + except UnicodeError: return False return True diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py index 178ac8a68c..bbbc276697 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py @@ -666,18 +666,20 @@ class CachedListDescriptorTestCase(unittest.TestCase): with LoggingContext("c1") as c1: obj = Cls() obj.mock.return_value = {10: "fish", 20: "chips"} + + # start the lookup off d1 = obj.list_fn([10, 20], 2) self.assertEqual(current_context(), SENTINEL_CONTEXT) r = yield d1 self.assertEqual(current_context(), c1) - obj.mock.assert_called_once_with([10, 20], 2) + obj.mock.assert_called_once_with((10, 20), 2) self.assertEqual(r, {10: "fish", 20: "chips"}) obj.mock.reset_mock() # a call with different params should call the mock again obj.mock.return_value = {30: "peas"} r = yield obj.list_fn([20, 30], 2) - obj.mock.assert_called_once_with([30], 2) + obj.mock.assert_called_once_with((30,), 2) self.assertEqual(r, {20: "chips", 30: "peas"}) obj.mock.reset_mock() @@ -692,6 +694,15 @@ class CachedListDescriptorTestCase(unittest.TestCase): obj.mock.assert_not_called() self.assertEqual(r, {10: "fish", 20: "chips", 30: "peas"}) + # we should also be able to use a (single-use) iterable, and should + # deduplicate the keys + obj.mock.reset_mock() + obj.mock.return_value = {40: "gravy"} + iterable = (x for x in [10, 40, 40]) + r = yield obj.list_fn(iterable, 2) + obj.mock.assert_called_once_with((40,), 2) + self.assertEqual(r, {10: "fish", 40: "gravy"}) + @defer.inlineCallbacks def test_invalidate(self): """Make sure that invalidation callbacks are called.""" @@ -717,7 +728,7 @@ class CachedListDescriptorTestCase(unittest.TestCase): # cache miss obj.mock.return_value = {10: "fish", 20: "chips"} r1 = yield obj.list_fn([10, 20], 2, on_invalidate=invalidate0) - obj.mock.assert_called_once_with([10, 20], 2) + obj.mock.assert_called_once_with((10, 20), 2) self.assertEqual(r1, {10: "fish", 20: "chips"}) obj.mock.reset_mock() diff --git a/tox.ini b/tox.ini index ecd609271d..da77d124fc 100644 --- a/tox.ini +++ b/tox.ini @@ -34,7 +34,17 @@ lint_targets = synapse tests scripts + # annoyingly, black doesn't find these so we have to list them + scripts/export_signing_key + scripts/generate_config + scripts/generate_log_config + scripts/hash_password + scripts/register_new_matrix_user + scripts/synapse_port_db scripts-dev + scripts-dev/build_debian_packages + scripts-dev/sign_json + scripts-dev/update_database stubs contrib synctl |