More codes and rules

author: BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com> 2022-04-24 12:14:23 -0400
committer: BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com> 2022-04-24 12:25:01 -0400
commit: 97105ebc5f1c04471cc8e9468c03973f9a4e502e (patch)
tree: 642001224152bb2b019cfc2919e3f4027be09765 /scripts
parent: [skip ci] update gitignore (diff)
download: nheko-97105ebc5f1c04471cc8e9468c03973f9a4e502e.tar.xz
1 files changed, 78 insertions, 18 deletions
diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py
index 9dae1544..f2a6ba98 100755
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@@ -25,7 +25,37 @@ const QVector<Emoji> emoji::Provider::emoji = {
     ''')
     d = dict(kwargs=kwargs)
     print(tmpl.render(d))
-
+# FIXME: Stop this madness
+def humanize_keypad(num): 
+    match num: 
+        case "0": 
+            return "zero" 
+        case "1": 
+            return "one"
+        case "2": 
+            return "two"
+        case "3": 
+            return "three"
+        case "4": 
+            return "four"
+        case "5": 
+            return "five"
+        case "6": 
+            return "six" 
+        case "7": 
+            return "seven" 
+        case "8": 
+            return "eight"
+        case "9": 
+            return "nine"
+        case "10": 
+            return "ten"
+        case "*": 
+            return "asterisk"
+        case "#": 
+            return "hash"
+        case _: 
+            return None
 if __name__ == '__main__':
     if len(sys.argv) < 3:
         print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt')
@@ -52,7 +82,8 @@ if __name__ == '__main__':
         'Activities': activity,
         'Objects': objects,
         'Symbols': symbols,
-        'Flags': flags
+        'Flags': flags,
+        'Component': symbols
     }
     shortcodeDict = {} 
     # for my sanity - this strips newlines
@@ -74,34 +105,63 @@ if __name__ == '__main__':
         code, qualification, charAndName = segments
 
         # skip unqualified versions of same unicode
-        if qualification != 'fully-qualified':
+        if qualification != 'fully-qualified' and qualification != 'component' :
             continue
-
+        
 
         char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
         shortname = name
-
+        
+        # discard skin tone variants for sanity
+        # __contains__ is so stupid i hate prototype languages
+        if name.__contains__("skin tone") and qualification != 'component': 
+            continue
+        if qualification == 'component' and not name.__contains__("skin tone"): 
+            continue
         #TODO: Handle skintone modifiers in a sane way
         if shortname in shortcodeDict: 
             shortname = shortcodeDict[shortname]
-        else: 
+        else:
+            shortname = shortname.lower()
+            if shortname.endswith(' (blood type)'): 
+                shortname = shortname[:-13]
+            if shortname.endswith(': red hair'): 
+                shortname = "red_haired_" + shortname[:-10]
+            if shortname.endswith(': curly hair'): 
+                shortname = "curly_haired_" + shortname[:-12]
+            if shortname.endswith(': white hair'): 
+                shortname = "white_haried_" + shortname[:-12]
+            if shortname.endswith(': bald'): 
+                shortname = "bald_" + shortname[:-6]
+            if shortname.endswith(': beard'): 
+                shortname = "bearded_" + shortname[:-7]
             if shortname.endswith(' face'): 
                 shortname = shortname[:-5]
-            elif shortname.endswith(' button'): 
+            if shortname.endswith(' button'): 
                 shortname = shortname[:-7] 
-            else: 
-                # FIXME: Is there a better way to do this?
-                matchobj = re.match(r'^flag: (.*)$', shortname) 
-                if matchobj: 
-                    country, = matchobj.groups() 
-                    shortname = country + " flag"
-            shortname = shortname.replace(" ", "_")
-            shortname = shortname.replace("“", "")
-            shortname = shortname.replace("”", "")
-            shortname = shortname.replace(":", "")
+            if shortname.endswith(' banknote'): 
+                shortname = shortname[:-9]
+            keycapmtch = re.match(r'^keycap: (.+)$', shortname)
+            if keycapmtch: 
+                keycapthing, = keycapmtch.groups()
+                type(keycapthing)
+                num_name = humanize_keypad(keycapthing) 
+                if num_name: 
+                    shortname = num_name
+                else: 
+                    raise Exception("incomplete keycap " + keycapthing + ", fix ur code")
+                
+            # FIXME: Is there a better way to do this?
+            matchobj = re.match(r'^flag: (.*)$', shortname) 
+            if matchobj: 
+                country, = matchobj.groups() 
+                shortname = country + " flag"
+            shortname = shortname.replace("u.s.", "us")
+            shortname = shortname.replace("&", "and")
             shortname = shortname.replace("-", "_")
+            shortname, = re.match(r'^_*(.+)_*$', shortname).groups()
+            shortname = re.sub(r'\W', '_', shortname) 
             shortname = re.sub(r'_{2,}', '_', shortname) 
-            shortname = shortname.lower()
             shortname = unidecode(shortname)
         categories[current_category].append(Emoji(code, shortname, name))
author	BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com>	2022-04-24 12:14:23 -0400
committer	BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com>	2022-04-24 12:25:01 -0400
commit	97105ebc5f1c04471cc8e9468c03973f9a4e502e (patch)
tree	642001224152bb2b019cfc2919e3f4027be09765 /scripts
parent	[skip ci] update gitignore (diff)
download	nheko-97105ebc5f1c04471cc8e9468c03973f9a4e502e.tar.xz