summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.rst9
-rw-r--r--UPGRADE.rst9
-rw-r--r--contrib/prometheus/consoles/synapse.html20
-rw-r--r--contrib/prometheus/synapse-v1.rules6
-rw-r--r--contrib/prometheus/synapse-v2.rules12
-rw-r--r--docs/metrics-howto.rst11
-rw-r--r--synapse/handlers/room_member.py8
-rw-r--r--synapse/http/endpoint.py103
-rw-r--r--synapse/http/server.py26
-rw-r--r--synapse/util/metrics.py25
-rw-r--r--tests/test_dns.py29
11 files changed, 89 insertions, 169 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 5fbad54427..38372381ac 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,12 +1,3 @@
-Changes in synapse v0.28.0 (2018-xx-xx)
-=======================================
-
-As previously advised, this release removes a number of redundant Prometheus
-metrics. Administrators may need to update their dashboards and alerting rules
-to use the updated metric names, if they have not already done so. See
-`docs/metrics-howto.rst <docs/metrics-howto.rst#deprecated-metrics-removed-in-0-28-0>`_
-for more details.
-
 Changes in synapse v0.27.2 (2018-03-26)
 =======================================
 
diff --git a/UPGRADE.rst b/UPGRADE.rst
index 39a16b1c0c..f6bb1070b1 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -52,7 +52,7 @@ Upgrading to $NEXT_VERSION
 ====================
 
 This release expands the anonymous usage stats sent if the opt-in
-``report_stats`` configuration is set to ``true``. We now capture RSS memory
+``report_stats`` configuration is set to ``true``. We now capture RSS memory 
 and cpu use at a very coarse level. This requires administrators to install
 the optional ``psutil`` python module.
 
@@ -60,13 +60,6 @@ We would appreciate it if you could assist by ensuring this module is available
 and ``report_stats`` is enabled. This will let us see if performance changes to
 synapse are having an impact to the general community.
 
-This release also removes a number of redundant Prometheus metrics.
-Administrators may need to update their dashboards and alerting rules to use
-the updated metric names, if they have not already done so. See
-`docs/metrics-howto.rst <docs/metrics-howto.rst#deprecated-metrics-removed-in-0-28-0>`_
-for more details.
-
-
 Upgrading to v0.15.0
 ====================
 
diff --git a/contrib/prometheus/consoles/synapse.html b/contrib/prometheus/consoles/synapse.html
index e23d8a1fce..69aa87f85e 100644
--- a/contrib/prometheus/consoles/synapse.html
+++ b/contrib/prometheus/consoles/synapse.html
@@ -202,11 +202,11 @@ new PromConsole.Graph({
 <h1>Requests</h1>
 
 <h3>Requests by Servlet</h3>
-<div id="synapse_http_server_requests_servlet"></div>
+<div id="synapse_http_server_request_count_servlet"></div>
 <script>
 new PromConsole.Graph({
-  node: document.querySelector("#synapse_http_server_requests_servlet"),
-  expr: "rate(synapse_http_server_requests:servlet[2m])",
+  node: document.querySelector("#synapse_http_server_request_count_servlet"),
+  expr: "rate(synapse_http_server_request_count:servlet[2m])",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
@@ -215,11 +215,11 @@ new PromConsole.Graph({
 })
 </script>
 <h4>&nbsp;(without <tt>EventStreamRestServlet</tt> or <tt>SyncRestServlet</tt>)</h4>
-<div id="synapse_http_server_requests_servlet_minus_events"></div>
+<div id="synapse_http_server_request_count_servlet_minus_events"></div>
 <script>
 new PromConsole.Graph({
-  node: document.querySelector("#synapse_http_server_requests_servlet_minus_events"),
-  expr: "rate(synapse_http_server_requests:servlet{servlet!=\"EventStreamRestServlet\", servlet!=\"SyncRestServlet\"}[2m])",
+  node: document.querySelector("#synapse_http_server_request_count_servlet_minus_events"),
+  expr: "rate(synapse_http_server_request_count:servlet{servlet!=\"EventStreamRestServlet\", servlet!=\"SyncRestServlet\"}[2m])",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
@@ -233,7 +233,7 @@ new PromConsole.Graph({
 <script>
 new PromConsole.Graph({
   node: document.querySelector("#synapse_http_server_response_time_avg"),
-  expr: "rate(synapse_http_server_response_time:total[2m]) / rate(synapse_http_server_response_time:count[2m]) / 1000",
+  expr: "rate(synapse_http_server_response_time_seconds[2m]) / rate(synapse_http_server_response_count[2m]) / 1000",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
@@ -276,7 +276,7 @@ new PromConsole.Graph({
 <script>
 new PromConsole.Graph({
   node: document.querySelector("#synapse_http_server_response_ru_utime"),
-  expr: "rate(synapse_http_server_response_ru_utime:total[2m])",
+  expr: "rate(synapse_http_server_response_ru_utime_seconds[2m])",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
@@ -291,7 +291,7 @@ new PromConsole.Graph({
 <script>
 new PromConsole.Graph({
   node: document.querySelector("#synapse_http_server_response_db_txn_duration"),
-  expr: "rate(synapse_http_server_response_db_txn_duration:total[2m])",
+  expr: "rate(synapse_http_server_response_db_txn_duration_seconds[2m])",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
@@ -306,7 +306,7 @@ new PromConsole.Graph({
 <script>
 new PromConsole.Graph({
   node: document.querySelector("#synapse_http_server_send_time_avg"),
-  expr: "rate(synapse_http_server_response_time:total{servlet='RoomSendEventRestServlet'}[2m]) / rate(synapse_http_server_response_time:count{servlet='RoomSendEventRestServlet'}[2m]) / 1000",
+  expr: "rate(synapse_http_server_response_time_second{servlet='RoomSendEventRestServlet'}[2m]) / rate(synapse_http_server_response_count{servlet='RoomSendEventRestServlet'}[2m]) / 1000",
   name: "[[servlet]]",
   yAxisFormatter: PromConsole.NumberFormatter.humanize,
   yHoverFormatter: PromConsole.NumberFormatter.humanize,
diff --git a/contrib/prometheus/synapse-v1.rules b/contrib/prometheus/synapse-v1.rules
index b6f84174b0..4c900ba537 100644
--- a/contrib/prometheus/synapse-v1.rules
+++ b/contrib/prometheus/synapse-v1.rules
@@ -1,10 +1,10 @@
 synapse_federation_transaction_queue_pendingEdus:total = sum(synapse_federation_transaction_queue_pendingEdus or absent(synapse_federation_transaction_queue_pendingEdus)*0)
 synapse_federation_transaction_queue_pendingPdus:total = sum(synapse_federation_transaction_queue_pendingPdus or absent(synapse_federation_transaction_queue_pendingPdus)*0)
 
-synapse_http_server_requests:method{servlet=""} = sum(synapse_http_server_requests) by (method)
-synapse_http_server_requests:servlet{method=""} = sum(synapse_http_server_requests) by (servlet)
+synapse_http_server_request_count:method{servlet=""} = sum(synapse_http_server_request_count) by (method)
+synapse_http_server_request_count:servlet{method=""} = sum(synapse_http_server_request_count) by (servlet)
 
-synapse_http_server_requests:total{servlet=""} = sum(synapse_http_server_requests:by_method) by (servlet)
+synapse_http_server_request_count:total{servlet=""} = sum(synapse_http_server_request_count:by_method) by (servlet)
 
 synapse_cache:hit_ratio_5m = rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m])
 synapse_cache:hit_ratio_30s = rate(synapse_util_caches_cache:hits[30s]) / rate(synapse_util_caches_cache:total[30s])
diff --git a/contrib/prometheus/synapse-v2.rules b/contrib/prometheus/synapse-v2.rules
index 07e37a885e..6ccca2daaf 100644
--- a/contrib/prometheus/synapse-v2.rules
+++ b/contrib/prometheus/synapse-v2.rules
@@ -5,19 +5,19 @@ groups:
     expr: "sum(synapse_federation_transaction_queue_pendingEdus or absent(synapse_federation_transaction_queue_pendingEdus)*0)"
   - record: "synapse_federation_transaction_queue_pendingPdus:total"
     expr:   "sum(synapse_federation_transaction_queue_pendingPdus or absent(synapse_federation_transaction_queue_pendingPdus)*0)"
-  - record: 'synapse_http_server_requests:method'
+  - record: 'synapse_http_server_request_count:method'
     labels:
       servlet: ""
-    expr: "sum(synapse_http_server_requests) by (method)"
-  - record: 'synapse_http_server_requests:servlet'
+    expr: "sum(synapse_http_server_request_count) by (method)"
+  - record: 'synapse_http_server_request_count:servlet'
     labels:
       method: ""
-    expr: 'sum(synapse_http_server_requests) by (servlet)'
+    expr: 'sum(synapse_http_server_request_count) by (servlet)'
 
-  - record: 'synapse_http_server_requests:total'
+  - record: 'synapse_http_server_request_count:total'
     labels:
       servlet: ""
-    expr: 'sum(synapse_http_server_requests:by_method) by (servlet)'
+    expr: 'sum(synapse_http_server_request_count:by_method) by (servlet)'
 
   - record: 'synapse_cache:hit_ratio_5m'
     expr: 'rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m])'
diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst
index 5e2d7c52ec..8acc479bc3 100644
--- a/docs/metrics-howto.rst
+++ b/docs/metrics-howto.rst
@@ -34,17 +34,6 @@ How to monitor Synapse metrics using Prometheus
    Restart prometheus.
 
 
-Deprecated metrics removed in 0.28.0
-------------------------------------
-
-Synapse 0.28.0 removes all of the metrics deprecated by 0.27.0, which are those
-listed under "Old name" below. This has been done to reduce the bandwidth used
-by gathering metrics and the storage requirements for the Prometheus server, as
-well as reducing CPU overhead for both Synapse and Prometheus.
-
-Administrators should update any alerts or monitoring dashboards to use the
-"New name" listed below.
-
 Block and response metrics renamed for 0.27.0
 ---------------------------------------------
 
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 9977be8831..c45142d38d 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -852,6 +852,14 @@ class RoomMemberMasterHandler(RoomMemberHandler):
     def _remote_join(self, requester, remote_room_hosts, room_id, user, content):
         """Implements RoomMemberHandler._remote_join
         """
+        # filter ourselves out of remote_room_hosts: do_invite_join ignores it
+        # and if it is the only entry we'd like to return a 404 rather than a
+        # 500.
+
+        remote_room_hosts = [
+            host for host in remote_room_hosts if host != self.hs.hostname
+        ]
+
         if len(remote_room_hosts) == 0:
             raise SynapseError(404, "No known servers")
 
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
index 87639b9151..00572c2897 100644
--- a/synapse/http/endpoint.py
+++ b/synapse/http/endpoint.py
@@ -12,8 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import socket
-
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
 from twisted.internet import defer, reactor
 from twisted.internet.error import ConnectError
@@ -33,7 +31,7 @@ SERVER_CACHE = {}
 
 # our record of an individual server which can be tried to reach a destination.
 #
-# "host" is actually a dotted-quad or ipv6 address string. Except when there's
+# "host" is the hostname acquired from the SRV record. Except when there's
 # no SRV record, in which case it is the original hostname.
 _Server = collections.namedtuple(
     "_Server", "priority weight host port expires"
@@ -297,20 +295,13 @@ def resolve_service(service_name, dns_client=client, cache=SERVER_CACHE, clock=t
 
             payload = answer.payload
 
-            hosts = yield _get_hosts_for_srv_record(
-                dns_client, str(payload.target)
-            )
-
-            for (ip, ttl) in hosts:
-                host_ttl = min(answer.ttl, ttl)
-
-                servers.append(_Server(
-                    host=ip,
-                    port=int(payload.port),
-                    priority=int(payload.priority),
-                    weight=int(payload.weight),
-                    expires=int(clock.time()) + host_ttl,
-                ))
+            servers.append(_Server(
+                host=str(payload.target),
+                port=int(payload.port),
+                priority=int(payload.priority),
+                weight=int(payload.weight),
+                expires=int(clock.time()) + answer.ttl,
+            ))
 
         servers.sort()
         cache[service_name] = list(servers)
@@ -328,81 +319,3 @@ def resolve_service(service_name, dns_client=client, cache=SERVER_CACHE, clock=t
             raise e
 
     defer.returnValue(servers)
-
-
-@defer.inlineCallbacks
-def _get_hosts_for_srv_record(dns_client, host):
-    """Look up each of the hosts in a SRV record
-
-    Args:
-        dns_client (twisted.names.dns.IResolver):
-        host (basestring): host to look up
-
-    Returns:
-        Deferred[list[(str, int)]]: a list of (host, ttl) pairs
-
-    """
-    ip4_servers = []
-    ip6_servers = []
-
-    def cb(res):
-        # lookupAddress and lookupIP6Address return a three-tuple
-        # giving the answer, authority, and additional sections of the
-        # response.
-        #
-        # we only care about the answers.
-
-        return res[0]
-
-    def eb(res, record_type):
-        if res.check(DNSNameError):
-            return []
-        logger.warn("Error looking up %s for %s: %s", record_type, host, res)
-        return res
-
-    # no logcontexts here, so we can safely fire these off and gatherResults
-    d1 = dns_client.lookupAddress(host).addCallbacks(
-        cb, eb, errbackArgs=("A", ))
-    d2 = dns_client.lookupIPV6Address(host).addCallbacks(
-        cb, eb, errbackArgs=("AAAA", ))
-    results = yield defer.DeferredList(
-        [d1, d2], consumeErrors=True)
-
-    # if all of the lookups failed, raise an exception rather than blowing out
-    # the cache with an empty result.
-    if results and all(s == defer.FAILURE for (s, _) in results):
-        defer.returnValue(results[0][1])
-
-    for (success, result) in results:
-        if success == defer.FAILURE:
-            continue
-
-        for answer in result:
-            if not answer.payload:
-                continue
-
-            try:
-                if answer.type == dns.A:
-                    ip = answer.payload.dottedQuad()
-                    ip4_servers.append((ip, answer.ttl))
-                elif answer.type == dns.AAAA:
-                    ip = socket.inet_ntop(
-                        socket.AF_INET6, answer.payload.address,
-                    )
-                    ip6_servers.append((ip, answer.ttl))
-                else:
-                    # the most likely candidate here is a CNAME record.
-                    # rfc2782 says srvs may not point to aliases.
-                    logger.warn(
-                        "Ignoring unexpected DNS record type %s for %s",
-                        answer.type, host,
-                    )
-                    continue
-            except Exception as e:
-                logger.warn("Ignoring invalid DNS response for %s: %s",
-                            host, e)
-                continue
-
-    # keep the ipv4 results before the ipv6 results, mostly to match historical
-    # behaviour.
-    defer.returnValue(ip4_servers + ip6_servers)
diff --git a/synapse/http/server.py b/synapse/http/server.py
index ac75206ef5..64e083ebfc 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -47,6 +47,17 @@ metrics = synapse.metrics.get_metrics_for(__name__)
 response_count = metrics.register_counter(
     "response_count",
     labels=["method", "servlet", "tag"],
+    alternative_names=(
+        # the following are all deprecated aliases for the same metric
+        metrics.name_prefix + x for x in (
+            "_requests",
+            "_response_time:count",
+            "_response_ru_utime:count",
+            "_response_ru_stime:count",
+            "_response_db_txn_count:count",
+            "_response_db_txn_duration:count",
+        )
+    )
 )
 
 requests_counter = metrics.register_counter(
@@ -62,24 +73,39 @@ outgoing_responses_counter = metrics.register_counter(
 response_timer = metrics.register_counter(
     "response_time_seconds",
     labels=["method", "servlet", "tag"],
+    alternative_names=(
+        metrics.name_prefix + "_response_time:total",
+    ),
 )
 
 response_ru_utime = metrics.register_counter(
     "response_ru_utime_seconds", labels=["method", "servlet", "tag"],
+    alternative_names=(
+        metrics.name_prefix + "_response_ru_utime:total",
+    ),
 )
 
 response_ru_stime = metrics.register_counter(
     "response_ru_stime_seconds", labels=["method", "servlet", "tag"],
+    alternative_names=(
+        metrics.name_prefix + "_response_ru_stime:total",
+    ),
 )
 
 response_db_txn_count = metrics.register_counter(
     "response_db_txn_count", labels=["method", "servlet", "tag"],
+    alternative_names=(
+        metrics.name_prefix + "_response_db_txn_count:total",
+    ),
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
 response_db_txn_duration = metrics.register_counter(
     "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"],
+    alternative_names=(
+        metrics.name_prefix + "_response_db_txn_duration:total",
+    ),
 )
 
 # seconds spent waiting for a db connection, when processing this request
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index c3d8237e8f..e4b5687a4b 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -31,28 +31,53 @@ metrics = synapse.metrics.get_metrics_for(__name__)
 block_counter = metrics.register_counter(
     "block_count",
     labels=["block_name"],
+    alternative_names=(
+        # the following are all deprecated aliases for the same metric
+        metrics.name_prefix + x for x in (
+            "_block_timer:count",
+            "_block_ru_utime:count",
+            "_block_ru_stime:count",
+            "_block_db_txn_count:count",
+            "_block_db_txn_duration:count",
+        )
+    )
 )
 
 block_timer = metrics.register_counter(
     "block_time_seconds",
     labels=["block_name"],
+    alternative_names=(
+        metrics.name_prefix + "_block_timer:total",
+    ),
 )
 
 block_ru_utime = metrics.register_counter(
     "block_ru_utime_seconds", labels=["block_name"],
+    alternative_names=(
+        metrics.name_prefix + "_block_ru_utime:total",
+    ),
 )
 
 block_ru_stime = metrics.register_counter(
     "block_ru_stime_seconds", labels=["block_name"],
+    alternative_names=(
+        metrics.name_prefix + "_block_ru_stime:total",
+    ),
 )
 
 block_db_txn_count = metrics.register_counter(
     "block_db_txn_count", labels=["block_name"],
+    alternative_names=(
+        metrics.name_prefix + "_block_db_txn_count:total",
+    ),
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, in this block
 block_db_txn_duration = metrics.register_counter(
     "block_db_txn_duration_seconds", labels=["block_name"],
+    alternative_names=(
+        metrics.name_prefix + "_block_db_txn_duration:total",
+    ),
 )
 
 # seconds spent waiting for a db connection, in this block
diff --git a/tests/test_dns.py b/tests/test_dns.py
index d08b0f4333..af607d626f 100644
--- a/tests/test_dns.py
+++ b/tests/test_dns.py
@@ -33,8 +33,6 @@ class DnsTestCase(unittest.TestCase):
 
         service_name = "test_service.example.com"
         host_name = "example.com"
-        ip_address = "127.0.0.1"
-        ip6_address = "::1"
 
         answer_srv = dns.RRHeader(
             type=dns.SRV,
@@ -43,29 +41,9 @@ class DnsTestCase(unittest.TestCase):
             )
         )
 
-        answer_a = dns.RRHeader(
-            type=dns.A,
-            payload=dns.Record_A(
-                address=ip_address,
-            )
-        )
-
-        answer_aaaa = dns.RRHeader(
-            type=dns.AAAA,
-            payload=dns.Record_AAAA(
-                address=ip6_address,
-            )
-        )
-
         dns_client_mock.lookupService.return_value = defer.succeed(
             ([answer_srv], None, None),
         )
-        dns_client_mock.lookupAddress.return_value = defer.succeed(
-            ([answer_a], None, None),
-        )
-        dns_client_mock.lookupIPV6Address.return_value = defer.succeed(
-            ([answer_aaaa], None, None),
-        )
 
         cache = {}
 
@@ -74,13 +52,10 @@ class DnsTestCase(unittest.TestCase):
         )
 
         dns_client_mock.lookupService.assert_called_once_with(service_name)
-        dns_client_mock.lookupAddress.assert_called_once_with(host_name)
-        dns_client_mock.lookupIPV6Address.assert_called_once_with(host_name)
 
-        self.assertEquals(len(servers), 2)
+        self.assertEquals(len(servers), 1)
         self.assertEquals(servers, cache[service_name])
-        self.assertEquals(servers[0].host, ip_address)
-        self.assertEquals(servers[1].host, ip6_address)
+        self.assertEquals(servers[0].host, host_name)
 
     @defer.inlineCallbacks
     def test_from_cache_expired_and_dns_fail(self):