summary refs log tree commit diff
path: root/src/Config.h
diff options
context:
space:
mode:
authorLukas Mai <l.mai@web.de>2023-02-01 21:14:14 +0100
committerLukas Mai <l.mai@web.de>2023-02-01 21:14:14 +0100
commitb9517e588b365436ebab14458553b8c2f799636c (patch)
tree14d662ad3c040387ec26b924c900617d1fc49829 /src/Config.h
parentMake notifications show the window again (diff)
downloadnheko-b9517e588b365436ebab14458553b8c2f799636c.tar.xz
Allow nested ()/[] brackets in URLs (fixes #1346)
Diffstat (limited to '')
-rw-r--r--src/Config.h50
1 files changed, 45 insertions, 5 deletions
diff --git a/src/Config.h b/src/Config.h
index ba9564f0..6cf6687e 100644
--- a/src/Config.h
+++ b/src/Config.h
@@ -26,11 +26,51 @@ constexpr auto LABEL_MEDIUM_SIZE_RATIO = 1.3;
 namespace strings {
 const QString url_html = QStringLiteral("<a href=\"\\1\">\\1</a>");
 const QRegularExpression url_regex(
-  // match an URL, that is not quoted, i.e.
-  // vvvvvv match quote via negative lookahead/lookbehind                              vv
-  //          vvvv atomic match url -> fail if there is a " before or after        vvv
-  QStringLiteral(
-    R"((?<!["'])(?>((www\.(?!\.)|[a-z][a-z0-9+.-]*://)[^\s<>'"]+[^!,\.\s<>'"\]\)\:]))(?!["']))"));
+    // match an unquoted URL
+    [](){
+        const auto
+            general_unicode = QStringLiteral(R"((?:[^\x{0}-\x{7f}\p{Cc}\s\p{P}]|[\x{2010}\x{2011}\x{2012}\x{2013}\x{2014}\x{2015}]))"),
+            protocol = QStringLiteral(R"((?:[Hh][Tt][Tt][Pp][Ss]?))"),
+            unreserved_subdelims_colon = QStringLiteral(R"([a-zA-Z0-9\-._~!$&'()*+,;=:])"),
+            pct_enc = QStringLiteral(R"((?:%[[:xdigit:]]{2}))"),
+            userinfo = "(?:" + unreserved_subdelims_colon + "*(?:" + pct_enc + unreserved_subdelims_colon + "*)*)",
+            dec_octet = QStringLiteral(R"((?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))"),
+            ipv4_addr = "(?:" + dec_octet + R"((?:\.)" + dec_octet + "){3})",
+            h16 = QStringLiteral(R"((?:[[:xdigit:]]{1,4}))"),
+            ls32 = "(?:" + h16 + ":" + h16 + "|" + ipv4_addr + ")",
+            ipv6_addr = "(?:"
+                "(?:" + h16 + ":){6}" + ls32
+                + "|" "::(?:" + h16 + ":){5}" + ls32
+                + "|" + h16 + "?::(?:" + h16 + ":){4}" + ls32
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,1})?::(?:" + h16 + ":){3}" + ls32
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,2})?::(?:" + h16 + ":){2}" + ls32
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,3})?::" + h16 + ":" + ls32
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,4})?::" + ls32
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,5})?::" + h16
+                + "|" "(?:" + h16 + "(?::" + h16 + "){0,6})?::"
+            ")",
+            ipvfuture = R"((?:v[[:xdigit:]]+\.)" + unreserved_subdelims_colon + "+)",
+            ip_literal = R"((?:\[(?:)" + ipv6_addr + "|" + ipvfuture + R"()\]))",
+            host_alnum = "(?:[a-zA-Z0-9]|" + general_unicode + ")",
+            host_label = "(?:" + host_alnum + "+(?:-+" + host_alnum + "+)*)",
+            hostname = "(?:" + host_label + R"((?:\.)" + host_label + R"()*\.?))",
+            host = "(?:" + hostname + "|" + ip_literal + ")",
+            path = R"((?:/((?:[a-zA-Z0-9\-._~!$&'*+,;=:@/]|)" + pct_enc + R"(|\((?-1)\)|)" + general_unicode + ")*))",
+            query = R"(((?:[a-zA-Z0-9\-._~!$&'*+,;=:@/?\\{}]|)" + pct_enc + R"(|\((?-1)\)|\[(?-1)\]|)" + general_unicode + ")*)",
+            fragment = query;
+        return
+            R"((?<!["'\w])(?>()"
+            + protocol + "://"
+            + "(?:" + userinfo + "@)?"
+            + host + "(?::[0-9]+)?"
+            + path + "?"
+            R"((?:\?)" + query + ")?"
+            R"((?:#)" + fragment + ")?"
+            "(?<![.!?,;:'])"
+            R"())(?!["']))";
+    }(),
+    QRegularExpression::UseUnicodePropertiesOption
+);
 // A matrix link to be converted back to markdown
 static const QRegularExpression
   matrixToLink(QStringLiteral(R"(<a href=\"(https://matrix.to/#/.*?)\">(.*?)</a>)"));