2 files changed, 34 insertions, 9 deletions
diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py
index 700cc3e1..37ad3cec 100755
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@@ -2,7 +2,7 @@
 
 import sys
 import re
-
+from unidecode import unidecode
 from jinja2 import Template
 
 
@@ -26,11 +26,12 @@ const QVector<Emoji> emoji::Provider::emoji = {
     print(tmpl.render(d))
 
 if __name__ == '__main__':
-    if len(sys.argv) < 2:
-        print('usage: emoji_codegen.py /path/to/emoji-test.txt')
+    if len(sys.argv) < 3:
+        print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt')
         sys.exit(1)
 
     filename = sys.argv[1]
+    shortcodefilename = sys.argv[2]
 
     people = []
     nature = []
@@ -52,7 +53,11 @@ if __name__ == '__main__':
         'Symbols': symbols,
         'Flags': flags
     }
-
+    shortcodeDict = {} 
+    # for my sanity - this strips newlines
+    for line in open(shortcodefilename, 'r', encoding="utf8"): 
+        longname, shortname = line.strip().split(':')
+        shortcodeDict[longname] = shortname
     current_category = ''
     for line in open(filename, 'r', encoding="utf8"):
         if line.startswith('# group:'):
@@ -68,16 +73,34 @@ if __name__ == '__main__':
         code, qualification, charAndName = segments
 
         # skip unqualified versions of same unicode
-        if qualification == 'unqualified':
+        if qualification != 'fully-qualified':
             continue
 
-        if qualification == 'component':
-            continue
 
         char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
-
+        # drop "face" part
+        
+        if name in shortcodeDict: 
+            name = shortcodeDict[name]
+        else: 
+            if name.endswith(' face'): 
+                name = name[:-5]
+            elif name.endswith(' button'): 
+                name = name[:-7]
+            else: 
+                matchobj = re.match(r'^flag: (.*)$', name) 
+                if matchobj: 
+                    country, = matchobj.groups() 
+                    name = country + " flag"
+            name = name.replace(" ", "_")
+            name = name.replace("“", "")
+            name = name.replace("”", "")
+            name = name.replace(":", "")
+            name = name.lower()
+            name = unidecode(name)
         categories[current_category].append(Emoji(code, name))
 
     # Use xclip to pipe the output to clipboard.
     # e.g ./codegen.py emoji.json | xclip -sel clip
+    # alternatively - delete the var from src/emoji/Provider.cpp, and do ./codegen.py emojis shortcodes >> src/emoji/Provider.cpp
     generate_qml_list(people=people, nature=nature, food=food, activity=activity, travel=travel, objects=objects, symbols=symbols, flags=flags)
diff --git a/scripts/update_emoji.md b/scripts/update_emoji.md
index 00fe8c4e..fae6d089 100644
--- a/scripts/update_emoji.md
+++ b/scripts/update_emoji.md
@@ -2,6 +2,8 @@
 
 1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/
 2. Overwrite the existing resources/emoji-test.txt with the new one
-3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
+3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt resources/shortcodes.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
 4. `make lint`
 5. Compile and test
+
+