diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py
index cfa72425..634887b2 100755
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@@ -1,31 +1,19 @@
#!/usr/bin/env python3
import sys
-import json
+import re
from jinja2 import Template
class Emoji(object):
- def __init__(self, code, shortname, category, order):
- self.code = ''.join(list(map(code_to_bytes, code.split('-'))))
+ def __init__(self, code, shortname):
+ self.code = repr(code.encode('utf-8'))[1:].strip("'")
self.shortname = shortname
- self.category = category
- self.order = int(order)
-
-
-def code_to_bytes(codepoint):
- '''
- Convert hex unicode codepoint to hex byte array.
- '''
- bytes = chr(int(codepoint, 16)).encode('utf-8')
-
- return str(bytes)[1:].strip("'")
-
def generate_code(emojis, category):
tmpl = Template('''
-const QList<Emoji> EmojiProvider::{{ category }} = {
+const std::vector<Emoji> emoji::Provider::{{ category }} = {
{%- for e in emoji %}
Emoji{QString::fromUtf8("{{ e.code }}"), "{{ e.shortname }}"},
{%- endfor %}
@@ -38,44 +26,56 @@ const QList<Emoji> EmojiProvider::{{ category }} = {
if __name__ == '__main__':
if len(sys.argv) < 2:
- print('usage: emoji_codegen.py /path/to/emoji.json')
+ print('usage: emoji_codegen.py /path/to/emoji-test.txt')
sys.exit(1)
filename = sys.argv[1]
- data = {}
- with open(filename, 'r') as filename:
- data = json.loads(filename.read())
+ people = []
+ nature = []
+ food = []
+ activity = []
+ travel = []
+ objects = []
+ symbols = []
+ flags = []
+
+ categories = {
+ 'Smileys & Emotion': people,
+ 'People & Body': people,
+ 'Animals & Nature': nature,
+ 'Food & Drink': food,
+ 'Travel & Places': travel,
+ 'Activities': activity,
+ 'Objects': objects,
+ 'Symbols': symbols,
+ 'Flags': flags
+ }
+
+ current_category = ''
+ for line in open(filename, 'r'):
+ if line.startswith('# group:'):
+ current_category = line.split(':', 1)[1].strip()
+
+ if not line or line.startswith('#'):
+ continue
- emojis = []
+ segments = re.split(r'\s+[#;] ', line.strip())
+ if len(segments) != 3:
+ continue
- for emoji_name in data:
- tmp = data[emoji_name]
+ code, qualification, charAndName = segments
- l = len(tmp['unicode'].split('-'))
+ # skip fully qualified versions of same unicode
+ if code.endswith('FE0F'):
+ continue
- if l > 1 and tmp['category'] == 'people':
+ if qualification == 'component':
continue
- emojis.append(
- Emoji(
- tmp['unicode'],
- tmp['shortname'],
- tmp['category'],
- tmp['emoji_order']
- )
- )
-
- emojis.sort(key=lambda x: x.order)
-
- people = list(filter(lambda x: x.category == "people", emojis))
- nature = list(filter(lambda x: x.category == "nature", emojis))
- food = list(filter(lambda x: x.category == "food", emojis))
- activity = list(filter(lambda x: x.category == "activity", emojis))
- travel = list(filter(lambda x: x.category == "travel", emojis))
- objects = list(filter(lambda x: x.category == "objects", emojis))
- symbols = list(filter(lambda x: x.category == "symbols", emojis))
- flags = list(filter(lambda x: x.category == "flags", emojis))
+ char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
+
+ categories[current_category].append(Emoji(char, name))
# Use xclip to pipe the output to clipboard.
# e.g ./codegen.py emoji.json | xclip -sel clip
diff --git a/scripts/update_emoji.md b/scripts/update_emoji.md
new file mode 100644
index 00000000..00fe8c4e
--- /dev/null
+++ b/scripts/update_emoji.md
@@ -0,0 +1,7 @@
+# Updating emoji
+
+1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/
+2. Overwrite the existing resources/emoji-test.txt with the new one
+3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
+4. `make lint`
+5. Compile and test
|