diff options
author | BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com> | 2022-04-24 12:14:23 -0400 |
---|---|---|
committer | BulbyVR <26726264+TheDrawingCoder-Gamer@users.noreply.github.com> | 2022-04-24 12:25:01 -0400 |
commit | 97105ebc5f1c04471cc8e9468c03973f9a4e502e (patch) | |
tree | 642001224152bb2b019cfc2919e3f4027be09765 /scripts | |
parent | [skip ci] update gitignore (diff) | |
download | nheko-97105ebc5f1c04471cc8e9468c03973f9a4e502e.tar.xz |
More codes and rules
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/emoji_codegen.py | 96 |
1 files changed, 78 insertions, 18 deletions
diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py index 9dae1544..f2a6ba98 100755 --- a/scripts/emoji_codegen.py +++ b/scripts/emoji_codegen.py @@ -25,7 +25,37 @@ const QVector<Emoji> emoji::Provider::emoji = { ''') d = dict(kwargs=kwargs) print(tmpl.render(d)) - +# FIXME: Stop this madness +def humanize_keypad(num): + match num: + case "0": + return "zero" + case "1": + return "one" + case "2": + return "two" + case "3": + return "three" + case "4": + return "four" + case "5": + return "five" + case "6": + return "six" + case "7": + return "seven" + case "8": + return "eight" + case "9": + return "nine" + case "10": + return "ten" + case "*": + return "asterisk" + case "#": + return "hash" + case _: + return None if __name__ == '__main__': if len(sys.argv) < 3: print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt') @@ -52,7 +82,8 @@ if __name__ == '__main__': 'Activities': activity, 'Objects': objects, 'Symbols': symbols, - 'Flags': flags + 'Flags': flags, + 'Component': symbols } shortcodeDict = {} # for my sanity - this strips newlines @@ -74,34 +105,63 @@ if __name__ == '__main__': code, qualification, charAndName = segments # skip unqualified versions of same unicode - if qualification != 'fully-qualified': + if qualification != 'fully-qualified' and qualification != 'component' : continue - + char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups() shortname = name - + + # discard skin tone variants for sanity + # __contains__ is so stupid i hate prototype languages + if name.__contains__("skin tone") and qualification != 'component': + continue + if qualification == 'component' and not name.__contains__("skin tone"): + continue #TODO: Handle skintone modifiers in a sane way if shortname in shortcodeDict: shortname = shortcodeDict[shortname] - else: + else: + shortname = shortname.lower() + if shortname.endswith(' (blood type)'): + shortname = shortname[:-13] + if shortname.endswith(': red hair'): + shortname = "red_haired_" + shortname[:-10] + if shortname.endswith(': curly hair'): + shortname = "curly_haired_" + shortname[:-12] + if shortname.endswith(': white hair'): + shortname = "white_haried_" + shortname[:-12] + if shortname.endswith(': bald'): + shortname = "bald_" + shortname[:-6] + if shortname.endswith(': beard'): + shortname = "bearded_" + shortname[:-7] if shortname.endswith(' face'): shortname = shortname[:-5] - elif shortname.endswith(' button'): + if shortname.endswith(' button'): shortname = shortname[:-7] - else: - # FIXME: Is there a better way to do this? - matchobj = re.match(r'^flag: (.*)$', shortname) - if matchobj: - country, = matchobj.groups() - shortname = country + " flag" - shortname = shortname.replace(" ", "_") - shortname = shortname.replace("“", "") - shortname = shortname.replace("”", "") - shortname = shortname.replace(":", "") + if shortname.endswith(' banknote'): + shortname = shortname[:-9] + keycapmtch = re.match(r'^keycap: (.+)$', shortname) + if keycapmtch: + keycapthing, = keycapmtch.groups() + type(keycapthing) + num_name = humanize_keypad(keycapthing) + if num_name: + shortname = num_name + else: + raise Exception("incomplete keycap " + keycapthing + ", fix ur code") + + # FIXME: Is there a better way to do this? + matchobj = re.match(r'^flag: (.*)$', shortname) + if matchobj: + country, = matchobj.groups() + shortname = country + " flag" + shortname = shortname.replace("u.s.", "us") + shortname = shortname.replace("&", "and") shortname = shortname.replace("-", "_") + shortname, = re.match(r'^_*(.+)_*$', shortname).groups() + shortname = re.sub(r'\W', '_', shortname) shortname = re.sub(r'_{2,}', '_', shortname) - shortname = shortname.lower() shortname = unidecode(shortname) categories[current_category].append(Emoji(code, shortname, name)) |