about summary refs log tree commit diff
path: root/LibMatrix/Extensions/UnicodeJsonEncoder.cs
diff options
context:
space:
mode:
Diffstat (limited to 'LibMatrix/Extensions/UnicodeJsonEncoder.cs')
-rw-r--r--LibMatrix/Extensions/UnicodeJsonEncoder.cs173
1 files changed, 173 insertions, 0 deletions
diff --git a/LibMatrix/Extensions/UnicodeJsonEncoder.cs b/LibMatrix/Extensions/UnicodeJsonEncoder.cs
new file mode 100644
index 0000000..ae58263
--- /dev/null
+++ b/LibMatrix/Extensions/UnicodeJsonEncoder.cs
@@ -0,0 +1,173 @@
+// LibMatrix: File sourced from https://github.com/dotnet/runtime/pull/87147/files under the MIT license.
+
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Text;
+using System.Text.Encodings.Web;
+
+namespace LibMatrix.Extensions;
+
+internal sealed class UnicodeJsonEncoder : JavaScriptEncoder
+{
+    internal static readonly UnicodeJsonEncoder Singleton = new UnicodeJsonEncoder();
+
+    private readonly bool _preferHexEscape;
+    private readonly bool _preferUppercase;
+
+    public UnicodeJsonEncoder()
+        : this(preferHexEscape: false, preferUppercase: false)
+    {
+    }
+
+    public UnicodeJsonEncoder(bool preferHexEscape, bool preferUppercase)
+    {
+        _preferHexEscape = preferHexEscape;
+        _preferUppercase = preferUppercase;
+    }
+
+    public override int MaxOutputCharactersPerInputCharacter => 6; // "\uXXXX" for a single char ("\uXXXX\uYYYY" [12 chars] for supplementary scalar value)
+
+    public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
+    {
+        for (int index = 0; index < textLength; ++index)
+        {
+            char value = text[index];
+
+            if (NeedsEncoding(value))
+            {
+                return index;
+            }
+        }
+
+        return -1;
+    }
+
+    public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+    {
+        bool encode = WillEncode(unicodeScalar);
+
+        if (!encode)
+        {
+            Span<char> span = new Span<char>(buffer, bufferLength);
+            int spanWritten;
+            bool succeeded = new Rune(unicodeScalar).TryEncodeToUtf16(span, out spanWritten);
+            numberOfCharactersWritten = spanWritten;
+            return succeeded;
+        }
+
+        if (!_preferHexEscape && unicodeScalar <= char.MaxValue && HasTwoCharacterEscape((char)unicodeScalar))
+        {
+            if (bufferLength < 2)
+            {
+                numberOfCharactersWritten = 0;
+                return false;
+            }
+
+            buffer[0] = '\\';
+            buffer[1] = GetTwoCharacterEscapeSuffix((char)unicodeScalar);
+            numberOfCharactersWritten = 2;
+            return true;
+        }
+        else
+        {
+            if (bufferLength < 6)
+            {
+                numberOfCharactersWritten = 0;
+                return false;
+            }
+
+            buffer[0] = '\\';
+            buffer[1] = 'u';
+            buffer[2] = '0';
+            buffer[3] = '0';
+            buffer[4] = ToHexDigit((unicodeScalar & 0xf0) >> 4, _preferUppercase);
+            buffer[5] = ToHexDigit(unicodeScalar & 0xf, _preferUppercase);
+            numberOfCharactersWritten = 6;
+            return true;
+        }
+    }
+
+    public override bool WillEncode(int unicodeScalar)
+    {
+        if (unicodeScalar > char.MaxValue)
+        {
+            return false;
+        }
+
+        return NeedsEncoding((char)unicodeScalar);
+    }
+
+    // https://datatracker.ietf.org/doc/html/rfc8259#section-7
+    private static bool NeedsEncoding(char value)
+    {
+        if (value == '"' || value == '\\')
+        {
+            return true;
+        }
+
+        return value <= '\u001f';
+    }
+
+    private static bool HasTwoCharacterEscape(char value)
+    {
+        // RFC 8259, Section 7, "char = " BNF
+        switch (value)
+        {
+            case '"':
+            case '\\':
+            case '/':
+            case '\b':
+            case '\f':
+            case '\n':
+            case '\r':
+            case '\t':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    private static char GetTwoCharacterEscapeSuffix(char value)
+    {
+        // RFC 8259, Section 7, "char = " BNF
+        switch (value)
+        {
+            case '"':
+                return '"';
+            case '\\':
+                return '\\';
+            case '/':
+                return '/';
+            case '\b':
+                return 'b';
+            case '\f':
+                return 'f';
+            case '\n':
+                return 'n';
+            case '\r':
+                return 'r';
+            case '\t':
+                return 't';
+            default:
+                throw new ArgumentOutOfRangeException(nameof(value));
+        }
+    }
+
+    private static char ToHexDigit(int value, bool uppercase)
+    {
+        if (value > 0xf)
+        {
+            throw new ArgumentOutOfRangeException(nameof(value));
+        }
+
+        if (value < 10)
+        {
+            return (char)(value + '0');
+        }
+        else
+        {
+            return (char)(value - 0xa + (uppercase ? 'A' : 'a'));
+        }
+    }
+}
\ No newline at end of file