3 files changed, 79 insertions, 15 deletions
diff --git a/scripts/codegen.sh b/scripts/codegen.sh
new file mode 100644
index 00000000..9ba5c5df
--- /dev/null
+++ b/scripts/codegen.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+ROOT=$(realpath "$PWD/$(dirname "$0")/..")
+cd $ROOT
+cat resources/provider-header.txt > src/emoji/Provider.cpp 
+cat resources/extra_emoji.txt resources/emoji-test.txt > resources/complete-emoji.txt
+scripts/emoji_codegen.py resources/complete-emoji.txt resources/shortcodes.txt >> src/emoji/Provider.cpp
+cd - > /dev/null
diff --git a/scripts/emoji_codegen.py b/scripts/emoji_codegen.py
index 700cc3e1..c52189dc 100755
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@@ -2,14 +2,15 @@
 
 import sys
 import re
-
+from unidecode import unidecode
 from jinja2 import Template
 
 
 class Emoji(object):
-    def __init__(self, code, shortname):
+    def __init__(self, code, shortname, unicodename):
         self.code = ''.join(['\\U'+c.rjust(8, '0') for c in code.strip().split(' ')])
         self.shortname = shortname
+        self.unicodename = unicodename
 
 def generate_qml_list(**kwargs):
     tmpl = Template('''
@@ -17,20 +18,20 @@ const QVector<Emoji> emoji::Provider::emoji = {
     {%- for c in kwargs.items() %}
     // {{ c[0].capitalize() }}
     {%- for e in c[1] %}
-    Emoji{QStringLiteral(u"{{ e.code }}"), QStringLiteral(u"{{ e.shortname }}"), emoji::Emoji::Category::{{ c[0].capitalize() }}},
+    Emoji{QStringLiteral(u"{{ e.code }}"), QStringLiteral(u"{{ e.shortname }}"), QStringLiteral(u"{{ e.unicodename }}"), emoji::Emoji::Category::{{ c[0].capitalize() }}},
     {%- endfor %}
     {%- endfor %}
 };
     ''')
     d = dict(kwargs=kwargs)
     print(tmpl.render(d))
-
 if __name__ == '__main__':
-    if len(sys.argv) < 2:
-        print('usage: emoji_codegen.py /path/to/emoji-test.txt')
+    if len(sys.argv) < 3:
+        print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt')
         sys.exit(1)
 
     filename = sys.argv[1]
+    shortcodefilename = sys.argv[2]
 
     people = []
     nature = []
@@ -50,9 +51,14 @@ if __name__ == '__main__':
         'Activities': activity,
         'Objects': objects,
         'Symbols': symbols,
-        'Flags': flags
+        'Flags': flags,
+        'Component': symbols
     }
-
+    shortcodeDict = {} 
+    # for my sanity - this strips newlines
+    for line in open(shortcodefilename, 'r', encoding="utf8"): 
+        longname, shortname = line.strip().split(':')
+        shortcodeDict[longname] = shortname
     current_category = ''
     for line in open(filename, 'r', encoding="utf8"):
         if line.startswith('# group:'):
@@ -68,16 +74,65 @@ if __name__ == '__main__':
         code, qualification, charAndName = segments
 
         # skip unqualified versions of same unicode
-        if qualification == 'unqualified':
-            continue
-
-        if qualification == 'component':
+        if qualification != 'fully-qualified':
             continue
+        
 
         char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
-
-        categories[current_category].append(Emoji(code, name))
+        shortname = name
+        # until skin tone is handled, keep them around
+        # discard skin tone variants for sanity
+        # __contains__ is so stupid i hate prototype languages
+        # if name.__contains__("skin tone") and qualification != 'component': 
+        #    continue
+        # if qualification == 'component' and not name.__contains__("skin tone"): 
+        #    continue
+        #TODO: Handle skintone modifiers in a sane way
+        basicallyTheSame = False
+        if code in shortcodeDict: 
+            shortname = shortcodeDict[code]
+        else:
+            shortname = shortname.lower()
+            if shortname.endswith(' (blood type)'): 
+                shortname = shortname[:-13]
+            if shortname.endswith(': red hair'): 
+                shortname = "red_haired_" + shortname[:-10]
+            if shortname.endswith(': curly hair'): 
+                shortname = "curly_haired_" + shortname[:-12]
+            if shortname.endswith(': white hair'): 
+                shortname = "white_haried_" + shortname[:-12]
+            if shortname.endswith(': bald'): 
+                shortname = "bald_" + shortname[:-6]
+            if shortname.endswith(': beard'): 
+                shortname = "bearded_" + shortname[:-7]
+            if shortname.endswith(' face'): 
+                shortname = shortname[:-5]
+            if shortname.endswith(' button'): 
+                shortname = shortname[:-7] 
+            if shortname.endswith(' banknote'): 
+                shortname = shortname[:-9]
+                
+            # FIXME: Is there a better way to do this?
+            matchobj = re.match(r'^flag: (.*)$', shortname) 
+            if shortname.startswith("flag: "): 
+                country = shortname[5:]
+                shortname = country + " flag"
+            shortname = shortname.replace("u.s.", "us")
+            shortname = shortname.replace("&", "and")
+            
+            if shortname == name.lower(): 
+                basicallyTheSame = True
+
+            shortname = shortname.replace("-", "_")
+            shortname = re.sub(r'\W', '_', shortname)
+            shortname, = re.match(r'^_*(.+)_*$', shortname).groups()
+            shortname = re.sub(r'_{2,}', '_', shortname) 
+            shortname = unidecode(shortname)
+        # if basicallyTheSame: 
+        #    shortname = ""
+        categories[current_category].append(Emoji(code, shortname, name))
 
     # Use xclip to pipe the output to clipboard.
     # e.g ./codegen.py emoji.json | xclip -sel clip
+    # alternatively - delete the var from src/emoji/Provider.cpp, and do ./codegen.py emojis shortcodes >> src/emoji/Provider.cpp
     generate_qml_list(people=people, nature=nature, food=food, activity=activity, travel=travel, objects=objects, symbols=symbols, flags=flags)
diff --git a/scripts/update_emoji.md b/scripts/update_emoji.md
index 00fe8c4e..fae6d089 100644
--- a/scripts/update_emoji.md
+++ b/scripts/update_emoji.md
@@ -2,6 +2,8 @@
 
 1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/
 2. Overwrite the existing resources/emoji-test.txt with the new one
-3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
+3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt resources/shortcodes.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
 4. `make lint`
 5. Compile and test
+
+