diff --git a/changelog.d/14303.misc b/changelog.d/14303.misc
new file mode 100644
index 0000000000..24ce238223
--- /dev/null
+++ b/changelog.d/14303.misc
@@ -0,0 +1 @@
+Improve type hinting of `RawHeaders`.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 2a9f039367..cb5892f041 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -178,13 +178,13 @@ class KeyUploadServlet(RestServlet):
# Proxy headers from the original request, such as the auth headers
# (in case the access token is there) and the original IP /
# User-Agent of the request.
- headers = {
- header: request.requestHeaders.getRawHeaders(header, [])
+ headers: Dict[bytes, List[bytes]] = {
+ header: list(request.requestHeaders.getRawHeaders(header, []))
for header in (b"Authorization", b"User-Agent")
}
# Add the previous hop to the X-Forwarded-For header.
- x_forwarded_for = request.requestHeaders.getRawHeaders(
- b"X-Forwarded-For", []
+ x_forwarded_for = list(
+ request.requestHeaders.getRawHeaders(b"X-Forwarded-For", [])
)
# we use request.client here, since we want the previous hop, not the
# original client (as returned by request.getClientAddress()).
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 084d0a5b84..4eb740c040 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -25,7 +25,6 @@ from typing import (
List,
Mapping,
Optional,
- Sequence,
Tuple,
Union,
)
@@ -90,14 +89,29 @@ incoming_responses_counter = Counter(
"synapse_http_client_responses", "", ["method", "code"]
)
-# the type of the headers list, to be passed to the t.w.h.Headers.
-# Actually we can mix str and bytes keys, but Mapping treats 'key' as invariant so
-# we simplify.
+# the type of the headers map, to be passed to the t.w.h.Headers.
+#
+# The actual type accepted by Twisted is
+# Mapping[Union[str, bytes], Sequence[Union[str, bytes]] ,
+# allowing us to mix and match str and bytes freely. However: any str is also a
+# Sequence[str]; passing a header string value which is a
+# standalone str is interpreted as a sequence of 1-codepoint strings. This is a disastrous footgun.
+# We use a narrower value type (RawHeaderValue) to avoid this footgun.
+#
+# We also simplify the keys to be either all str or all bytes. This helps because
+# Dict[K, V] is invariant in K (and indeed V).
RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValue"]]
# the value actually has to be a List, but List is invariant so we can't specify that
# the entries can either be Lists or bytes.
-RawHeaderValue = Sequence[Union[str, bytes]]
+RawHeaderValue = Union[
+ List[str],
+ List[bytes],
+ List[Union[str, bytes]],
+ Tuple[str, ...],
+ Tuple[bytes, ...],
+ Tuple[Union[str, bytes], ...],
+]
def check_against_blacklist(
|