diff options
author | Jason Little <realtyem@gmail.com> | 2023-03-08 20:49:49 -0600 |
---|---|---|
committer | Jason Little <realtyem@gmail.com> | 2023-03-08 20:49:49 -0600 |
commit | e2fecab98807658d786a88ddeaa1a6e6361208f4 (patch) | |
tree | a123660a9991067585d097aee849cd28f33cb0dd | |
parent | Merge branch 'develop' into comp-worker-shorthand (diff) | |
download | synapse-e2fecab98807658d786a88ddeaa1a6e6361208f4.tar.xz |
Try and apply review comments about sanitizing names.
1. Update a few comments for clarity. 2. Apply a replace() to remove whitespace in a requested worker name and use an underscore instead. 3. Stop-error if requesting a worker name, but forgetting to actually put one in. 4. If a worker name isn't requested, just use the set of worker types, sorted and concatenated with a hyphen. This avoids having to sanitize the worker name in this case. 5. Add regex to avoid nasty name surprises.
-rwxr-xr-x | docker/configure_workers_and_start.py | 83 |
1 files changed, 49 insertions, 34 deletions
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index f2312ea04b..4993615ff1 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -47,6 +47,7 @@ import os import platform +import re import subprocess import sys from collections import defaultdict @@ -630,14 +631,26 @@ def parse_worker_types( "To many worker names requested for a single worker, or to many " f"'='. Please fix: {worker_type_string}" ) - # if there was no name given, this will still be an empty string - requested_worker_name = worker_type_split[0] + # Assign the name + # Hopefully, no one will actually use whitespace in a requested worker name, + # but if they do, replace it with an underscore to maintain readability. + requested_worker_name = worker_type_split[0].replace(" ", "_") + + # If there was no name given, this will still be an empty string. If we got + # here, it's because there was a name requested with a '='. They must have + # forgotten to actually type in a name. e.g. '=type' + if not requested_worker_name: + error( + "Worker name requested but not actually found at: " + f"'{worker_type_string}'. Please fix." + ) # Check the last character of a requested name is not a number. This can # cause an error that comes across during startup as an exception in # 'startListening' and ends with 'Address already in use' for the port. # This only seems to occur when requesting more than 10 of a given - # worker_type, otherwise it would be ok. + # worker_type, otherwise it would be ok. Check this separately from the + # regex below, as otherwise digits are ok in a name if requested_worker_name[-1].isdigit(): error( "Found a number at the end of the requested worker name: " @@ -646,19 +659,30 @@ def parse_worker_types( "underscore after the number if this what you really want to do." ) + # Uncommon mistake that will cause problems. Name string containing any of a + # range of nasties will do Bad Things to filenames and probably nginx too. + if not re.match(r"^[a-zA-Z0-9_+-]*[a-zA-Z0-9_+-]$", requested_worker_name): + error( + "Requesting a worker name containing an invalid character is not " + "allowed, as it would raise a FileNotFoundError. Please only use " + "a-z, A-Z, 0-9, or '_', '+', '-' when forming your name: " + f"{requested_worker_name}" + ) + # Reassign the worker_type string with no name on it. new_worker_type_string = worker_type_split[1] # At this point, we have: - # requested_worker_name which might be an empty string - # new_worker_type_string which might still be what it was when it came in + # requested_worker_name which might be an empty string or a requested name + # new_worker_type_string which needs to be split into it's worker_type parts # Split the worker_type_string on "+", remove whitespace from ends then make # the list a set so it's deduplicated. worker_types_list = split_and_strip_string(new_worker_type_string, "+") worker_types_set: Set[str] = set(worker_types_list) - worker_base_name = new_worker_type_string + # Settle on a base name for this worker + worker_base_name: str if requested_worker_name: worker_base_name = requested_worker_name # It'll be useful to have this in the log in case it's a complex of many @@ -670,37 +694,26 @@ def parse_worker_types( ) else: - # The worker name will be the worker_type, however if spaces exist - # between concatenated worker_types and the "+" because of readability, - # it will error on startup. Recombine worker_types without spaces and log. - # Allows for human readability while declaring a complex worker type, e.g. + # The worker name will be the worker_type(s), however the potential for + # whitespace could still exist if we use the new_worker_type_string from + # above. Use the set of worker_type(s), sort it alphabetically(for + # determinism) and concatenate with a "-" instead of a "+" as it came in + # with. # 'event_persister + federation_reader + federation_sender + pusher' - if (len(worker_types_set) > 1) and (" " in worker_base_name): - worker_base_name = "+".join(sorted(worker_types_set)) - log( - "Default worker name would have contained spaces, which is not " - f"allowed: '{worker_type_string}'. Reformed name to not contain " - f"spaces: '{worker_base_name}'" - ) + # would become + # 'event_persister-federation_reader-federation_sender-pusher' + # This is later checked for sanity when pulling from WORKERS_CONFIG. + worker_base_name = "-".join(sorted(worker_types_set)) + log( + "Default worker name might have contained spaces, which is not " + f"allowed: '{new_worker_type_string}'. Reformed name to not contain " + f"spaces and be sorted alphabetically by type: '{worker_base_name}'" + ) # At this point, we have: - # worker_base_name which might be identical to - # new_worker_type_string which might still be what it was when it came in + # worker_base_name and # worker_types_set which is a Set of what worker_types are requested - # Uncommon mistake that will cause problems. Name string containing quotes - # or spaces will do Bad Things to filenames and probably nginx too. - if ( - (" " in worker_base_name) - or ('"' in worker_base_name) - or ("'" in worker_base_name) - ): - error( - "Requesting a worker name containing a quote mark or a space is " - "not allowed, as it would raise a FileNotFoundError. Please fix: " - f"{worker_base_name}" - ) - # This counter is used for naming workers with an incrementing number. Use the # worker_base_name for the index worker_type_counter[worker_base_name] += 1 @@ -714,11 +727,13 @@ def parse_worker_types( # deterministic. deterministic_worker_type_string = "+".join(sorted(worker_types_set)) check_worker_type = worker_name_checklist.get(worker_base_name) - # Either this doesn't exist yet, or it matches with a twin + # Either this doesn't exist yet, which means it's being seen for the first time, + # or it matches with a previously seen worker_type(s) combination. if (check_worker_type is None) or ( check_worker_type == deterministic_worker_type_string ): - # This is a no-op if it exists, which is expected to avoid the else block + # This is either setting the base name for the first time, or a no-op if it + # exists(e.g. 'event_persister:2'). worker_name_checklist.setdefault( worker_base_name, deterministic_worker_type_string ) |