diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/codegen.sh | 7 | ||||
-rwxr-xr-x | scripts/emoji_codegen.py | 83 | ||||
-rw-r--r-- | scripts/update_emoji.md | 4 |
3 files changed, 79 insertions, 15 deletions
diff --git a/scripts/codegen.sh b/scripts/codegen.sh new file mode 100644 index 00000000..9ba5c5df --- /dev/null +++ b/scripts/codegen.sh @@ -0,0 +1,7 @@ +#!/bin/bash +ROOT=$(realpath "$PWD/$(dirname "$0")/..") +cd $ROOT +cat resources/provider-header.txt > src/emoji/Provider.cpp +cat resources/extra_emoji.txt resources/emoji-test.txt > resources/complete-emoji.txt +scripts/emoji_codegen.py resources/complete-emoji.txt resources/shortcodes.txt >> src/emoji/Provider.cpp +cd - > /dev/null diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py index 700cc3e1..c52189dc 100755 --- a/scripts/emoji_codegen.py +++ b/scripts/emoji_codegen.py @@ -2,14 +2,15 @@ import sys import re - +from unidecode import unidecode from jinja2 import Template class Emoji(object): - def __init__(self, code, shortname): + def __init__(self, code, shortname, unicodename): self.code = ''.join(['\\U'+c.rjust(8, '0') for c in code.strip().split(' ')]) self.shortname = shortname + self.unicodename = unicodename def generate_qml_list(**kwargs): tmpl = Template(''' @@ -17,20 +18,20 @@ const QVector<Emoji> emoji::Provider::emoji = { {%- for c in kwargs.items() %} // {{ c[0].capitalize() }} {%- for e in c[1] %} - Emoji{QStringLiteral(u"{{ e.code }}"), QStringLiteral(u"{{ e.shortname }}"), emoji::Emoji::Category::{{ c[0].capitalize() }}}, + Emoji{QStringLiteral(u"{{ e.code }}"), QStringLiteral(u"{{ e.shortname }}"), QStringLiteral(u"{{ e.unicodename }}"), emoji::Emoji::Category::{{ c[0].capitalize() }}}, {%- endfor %} {%- endfor %} }; ''') d = dict(kwargs=kwargs) print(tmpl.render(d)) - if __name__ == '__main__': - if len(sys.argv) < 2: - print('usage: emoji_codegen.py /path/to/emoji-test.txt') + if len(sys.argv) < 3: + print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt') sys.exit(1) filename = sys.argv[1] + shortcodefilename = sys.argv[2] people = [] nature = [] @@ -50,9 +51,14 @@ if __name__ == '__main__': 'Activities': activity, 'Objects': objects, 'Symbols': symbols, - 'Flags': flags + 'Flags': flags, + 'Component': symbols } - + shortcodeDict = {} + # for my sanity - this strips newlines + for line in open(shortcodefilename, 'r', encoding="utf8"): + longname, shortname = line.strip().split(':') + shortcodeDict[longname] = shortname current_category = '' for line in open(filename, 'r', encoding="utf8"): if line.startswith('# group:'): @@ -68,16 +74,65 @@ if __name__ == '__main__': code, qualification, charAndName = segments # skip unqualified versions of same unicode - if qualification == 'unqualified': - continue - - if qualification == 'component': + if qualification != 'fully-qualified': continue + char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups() - - categories[current_category].append(Emoji(code, name)) + shortname = name + # until skin tone is handled, keep them around + # discard skin tone variants for sanity + # __contains__ is so stupid i hate prototype languages + # if name.__contains__("skin tone") and qualification != 'component': + # continue + # if qualification == 'component' and not name.__contains__("skin tone"): + # continue + #TODO: Handle skintone modifiers in a sane way + basicallyTheSame = False + if code in shortcodeDict: + shortname = shortcodeDict[code] + else: + shortname = shortname.lower() + if shortname.endswith(' (blood type)'): + shortname = shortname[:-13] + if shortname.endswith(': red hair'): + shortname = "red_haired_" + shortname[:-10] + if shortname.endswith(': curly hair'): + shortname = "curly_haired_" + shortname[:-12] + if shortname.endswith(': white hair'): + shortname = "white_haried_" + shortname[:-12] + if shortname.endswith(': bald'): + shortname = "bald_" + shortname[:-6] + if shortname.endswith(': beard'): + shortname = "bearded_" + shortname[:-7] + if shortname.endswith(' face'): + shortname = shortname[:-5] + if shortname.endswith(' button'): + shortname = shortname[:-7] + if shortname.endswith(' banknote'): + shortname = shortname[:-9] + + # FIXME: Is there a better way to do this? + matchobj = re.match(r'^flag: (.*)$', shortname) + if shortname.startswith("flag: "): + country = shortname[5:] + shortname = country + " flag" + shortname = shortname.replace("u.s.", "us") + shortname = shortname.replace("&", "and") + + if shortname == name.lower(): + basicallyTheSame = True + + shortname = shortname.replace("-", "_") + shortname = re.sub(r'\W', '_', shortname) + shortname, = re.match(r'^_*(.+)_*$', shortname).groups() + shortname = re.sub(r'_{2,}', '_', shortname) + shortname = unidecode(shortname) + # if basicallyTheSame: + # shortname = "" + categories[current_category].append(Emoji(code, shortname, name)) # Use xclip to pipe the output to clipboard. # e.g ./codegen.py emoji.json | xclip -sel clip + # alternatively - delete the var from src/emoji/Provider.cpp, and do ./codegen.py emojis shortcodes >> src/emoji/Provider.cpp generate_qml_list(people=people, nature=nature, food=food, activity=activity, travel=travel, objects=objects, symbols=symbols, flags=flags) diff --git a/scripts/update_emoji.md b/scripts/update_emoji.md index 00fe8c4e..fae6d089 100644 --- a/scripts/update_emoji.md +++ b/scripts/update_emoji.md @@ -2,6 +2,8 @@ 1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/ 2. Overwrite the existing resources/emoji-test.txt with the new one -3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output +3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt resources/shortcodes.txt` and replace the current tail of src/emoji/Provider.cpp with the new output 4. `make lint` 5. Compile and test + + |