synapse/http/site.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import logging
import time

from twisted.web.server import Request, Site

from synapse.http import redact_uri
from synapse.http.request_metrics import RequestMetrics, requests_counter
from synapse.logging.context import LoggingContext, PreserveLoggingContext

logger = logging.getLogger(__name__)

_next_request_seq = 0


class SynapseRequest(Request):
    """Class which encapsulates an HTTP request to synapse.

    All of the requests processed in synapse are of this type.

    It extends twisted's twisted.web.server.Request, and adds:
     * Unique request ID
     * A log context associated with the request
     * Redaction of access_token query-params in __repr__
     * Logging at start and end
     * Metrics to record CPU, wallclock and DB time by endpoint.

    It also provides a method `processing`, which returns a context manager. If this
    method is called, the request won't be logged until the context manager is closed;
    this is useful for asynchronous request handlers which may go on processing the
    request even after the client has disconnected.

    Attributes:
        logcontext(LoggingContext) : the log context for this request
    """

    def __init__(self, site, channel, *args, **kw):
        Request.__init__(self, channel, *args, **kw)
        self.site = site
        self._channel = channel  # this is used by the tests
        self.authenticated_entity = None
        self.start_time = 0

        # we can't yet create the logcontext, as we don't know the method.
        self.logcontext = None

        global _next_request_seq
        self.request_seq = _next_request_seq
        _next_request_seq += 1

        # whether an asynchronous request handler has called processing()
        self._is_processing = False

        # the time when the asynchronous request handler completed its processing
        self._processing_finished_time = None

        # what time we finished sending the response to the client (or the connection
        # dropped)
        self.finish_time = None

    def __repr__(self):
        # We overwrite this so that we don't log ``access_token``
        return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % (
            self.__class__.__name__,
            id(self),
            self.get_method(),
            self.get_redacted_uri(),
            self.clientproto.decode("ascii", errors="replace"),
            self.site.site_tag,
        )

    def get_request_id(self):
        return "%s-%i" % (self.get_method(), self.request_seq)

    def get_redacted_uri(self):
        uri = self.uri
        if isinstance(uri, bytes):
            uri = self.uri.decode("ascii")
        return redact_uri(uri)

    def get_method(self):
        """Gets the method associated with the request (or placeholder if not
        method has yet been received).

        Note: This is necessary as the placeholder value in twisted is str
        rather than bytes, so we need to sanitise `self.method`.

        Returns:
            str
        """
        method = self.method
        if isinstance(method, bytes):
            method = self.method.decode("ascii")
        return method

    def get_user_agent(self):
        return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]

    def render(self, resrc):
        # this is called once a Resource has been found to serve the request; in our
        # case the Resource in question will normally be a JsonResource.

        # create a LogContext for this request
        request_id = self.get_request_id()
        logcontext = self.logcontext = LoggingContext(request_id)
        logcontext.request = request_id

        # override the Server header which is set by twisted
        self.setHeader("Server", self.site.server_version_string)

        with PreserveLoggingContext(self.logcontext):
            # we start the request metrics timer here with an initial stab
            # at the servlet name. For most requests that name will be
            # JsonResource (or a subclass), and JsonResource._async_render
            # will update it once it picks a servlet.
            servlet_name = resrc.__class__.__name__
            self._started_processing(servlet_name)

            Request.render(self, resrc)

            # record the arrival of the request *after*
            # dispatching to the handler, so that the handler
            # can update the servlet name in the request
            # metrics
            requests_counter.labels(self.get_method(), self.request_metrics.name).inc()

    @contextlib.contextmanager
    def processing(self):
        """Record the fact that we are processing this request.

        Returns a context manager; the correct way to use this is:

        @defer.inlineCallbacks
        def handle_request(request):
            with request.processing("FooServlet"):
                yield really_handle_the_request()

        Once the context manager is closed, the completion of the request will be logged,
        and the various metrics will be updated.
        """
        if self._is_processing:
            raise RuntimeError("Request is already processing")
        self._is_processing = True

        try:
            yield
        except Exception:
            # this should already have been caught, and sent back to the client as a 500.
            logger.exception("Asynchronous messge handler raised an uncaught exception")
        finally:
            # the request handler has finished its work and either sent the whole response
            # back, or handed over responsibility to a Producer.

            self._processing_finished_time = time.time()
            self._is_processing = False

            # if we've already sent the response, log it now; otherwise, we wait for the
            # response to be sent.
            if self.finish_time is not None:
                self._finished_processing()

    def finish(self):
        """Called when all response data has been written to this Request.

        Overrides twisted.web.server.Request.finish to record the finish time and do
        logging.
        """
        self.finish_time = time.time()
        Request.finish(self)
        if not self._is_processing:
            with PreserveLoggingContext(self.logcontext):
                self._finished_processing()

    def connectionLost(self, reason):
        """Called when the client connection is closed before the response is written.

        Overrides twisted.web.server.Request.connectionLost to record the finish time and
        do logging.
        """
        self.finish_time = time.time()
        Request.connectionLost(self, reason)

        # we only get here if the connection to the client drops before we send
        # the response.
        #
        # It's useful to log it here so that we can get an idea of when
        # the client disconnects.
        with PreserveLoggingContext(self.logcontext):
            logger.warning(
                "Error processing request %r: %s %s", self, reason.type, reason.value
            )

            if not self._is_processing:
                self._finished_processing()

    def _started_processing(self, servlet_name):
        """Record the fact that we are processing this request.

        This will log the request's arrival. Once the request completes,
        be sure to call finished_processing.

        Args:
            servlet_name (str): the name of the servlet which will be
                processing this request. This is used in the metrics.

                It is possible to update this afterwards by updating
                self.request_metrics.name.
        """
        self.start_time = time.time()
        self.request_metrics = RequestMetrics()
        self.request_metrics.start(
            self.start_time, name=servlet_name, method=self.get_method()
        )

        self.site.access_logger.info(
            "%s - %s - Received request: %s %s",
            self.getClientIP(),
            self.site.site_tag,
            self.get_method(),
            self.get_redacted_uri(),
        )

    def _finished_processing(self):
        """Log the completion of this request and update the metrics
        """

        if self.logcontext is None:
            # this can happen if the connection closed before we read the
            # headers (so render was never called). In that case we'll already
            # have logged a warning, so just bail out.
            return

        usage = self.logcontext.get_resource_usage()

        if self._processing_finished_time is None:
            # we completed the request without anything calling processing()
            self._processing_finished_time = time.time()

        # the time between receiving the request and the request handler finishing
        processing_time = self._processing_finished_time - self.start_time

        # the time between the request handler finishing and the response being sent
        # to the client (nb may be negative)
        response_send_time = self.finish_time - self._processing_finished_time

        # need to decode as it could be raw utf-8 bytes
        # from a IDN servname in an auth header
        authenticated_entity = self.authenticated_entity
        if authenticated_entity is not None and isinstance(authenticated_entity, bytes):
            authenticated_entity = authenticated_entity.decode("utf-8", "replace")

        # ...or could be raw utf-8 bytes in the User-Agent header.
        # N.B. if you don't do this, the logger explodes cryptically
        # with maximum recursion trying to log errors about
        # the charset problem.
        # c.f. https://github.com/matrix-org/synapse/issues/3471
        user_agent = self.get_user_agent()
        if user_agent is not None:
            user_agent = user_agent.decode("utf-8", "replace")
        else:
            user_agent = "-"

        code = str(self.code)
        if not self.finished:
            # we didn't send the full response before we gave up (presumably because
            # the connection dropped)
            code += "!"

        self.site.access_logger.info(
            "%s - %s - {%s}"
            " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
            ' %sB %s "%s %s %s" "%s" [%d dbevts]',
            self.getClientIP(),
            self.site.site_tag,
            authenticated_entity,
            processing_time,
            response_send_time,
            usage.ru_utime,
            usage.ru_stime,
            usage.db_sched_duration_sec,
            usage.db_txn_duration_sec,
            int(usage.db_txn_count),
            self.sentLength,
            code,
            self.get_method(),
            self.get_redacted_uri(),
            self.clientproto.decode("ascii", errors="replace"),
            user_agent,
            usage.evt_db_fetch_count,
        )

        try:
            self.request_metrics.stop(self.finish_time, self.code, self.sentLength)
        except Exception as e:
            logger.warning("Failed to stop metrics: %r", e)


class XForwardedForRequest(SynapseRequest):
    def __init__(self, *args, **kw):
        SynapseRequest.__init__(self, *args, **kw)

    """
    Add a layer on top of another request that only uses the value of an
    X-Forwarded-For header as the result of C{getClientIP}.
    """

    def getClientIP(self):
        """
        @return: The client address (the first address) in the value of the
            I{X-Forwarded-For header}.  If the header is not present, return
            C{b"-"}.
        """
        return (
            self.requestHeaders.getRawHeaders(b"x-forwarded-for", [b"-"])[0]
            .split(b",")[0]
            .strip()
            .decode("ascii")
        )


class SynapseRequestFactory(object):
    def __init__(self, site, x_forwarded_for):
        self.site = site
        self.x_forwarded_for = x_forwarded_for

    def __call__(self, *args, **kwargs):
        if self.x_forwarded_for:
            return XForwardedForRequest(self.site, *args, **kwargs)
        else:
            return SynapseRequest(self.site, *args, **kwargs)


class SynapseSite(Site):
    """
    Subclass of a twisted http Site that does access logging with python's
    standard logging
    """

    def __init__(
        self,
        logger_name,
        site_tag,
        config,
        resource,
        server_version_string,
        *args,
        **kwargs
    ):
        Site.__init__(self, resource, *args, **kwargs)

        self.site_tag = site_tag

        proxied = config.get("x_forwarded", False)
        self.requestFactory = SynapseRequestFactory(self, proxied)
        self.access_logger = logging.getLogger(logger_name)
        self.server_version_string = server_version_string.encode("ascii")

    def log(self, request):
        pass