Skip to content

Commit 47602df

Browse files
committed
binmoji: add to project
this is needed for the metadata table Signed-off-by: William Casarin <[email protected]>
1 parent 14d7f2b commit 47602df

File tree

4 files changed

+482
-1
lines changed

4 files changed

+482
-1
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ CCAN_HDRS := ccan/ccan/utf8/utf8.h ccan/ccan/container_of/container_of.h ccan/cc
55
HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS)
66
FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c
77
BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/amount.c src/bolt11/hash_u5.c
8-
SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS)
8+
SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS)
99
LDS = $(OBJS) $(ARS)
1010
OBJS = $(SRCS:.c=.o)
1111
DEPS = $(OBJS) $(HEADERS) $(ARS)

src/binmoji.c

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
#include <assert.h>
2+
#include <stddef.h>
3+
#include <stdint.h>
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
8+
#include "binmoji.h"
9+
10+
#define PRIMARY_CP_SHIFT 42
11+
#define HASH_SHIFT 10
12+
#define TONE1_SHIFT 7
13+
#define TONE2_SHIFT 4
14+
#define FLAGS_SHIFT 0
15+
16+
#define PRIMARY_CP_MASK 0x3FFFFF
17+
#define HASH_MASK 0xFFFFFFFF
18+
#define TONE_MASK 0x7
19+
#define FLAGS_MASK 0xF
20+
21+
typedef struct {
22+
uint32_t hash;
23+
size_t count;
24+
uint32_t components[16];
25+
} EmojiHashEntry;
26+
27+
#include "binmoji_table.h"
28+
29+
const size_t num_hash_entries =
30+
sizeof(binmoji_table) / sizeof(binmoji_table[0]);
31+
32+
static uint32_t crc32(const uint32_t *data, size_t length)
33+
{
34+
uint32_t item, bit, crc = 0xFFFFFFFF;
35+
size_t i;
36+
int j;
37+
38+
if (data == NULL || length == 0)
39+
return 0;
40+
for (i = 0; i < length; ++i) {
41+
item = data[i];
42+
for (j = 0; j < 32; ++j) {
43+
bit = (item >> (31 - j)) & 1;
44+
if ((crc >> 31) ^ bit) {
45+
crc = (crc << 1) ^ 0x04C11DB7;
46+
} else {
47+
crc = (crc << 1);
48+
}
49+
}
50+
}
51+
return crc;
52+
}
53+
54+
static int is_base_emoji(uint32_t codepoint)
55+
{
56+
if (codepoint >= 0x1F3FB && codepoint <= 0x1F3FF) /* Skin Tones */
57+
return 0;
58+
if (codepoint == 0x200D) /* Zero Width Joiner */
59+
return 0;
60+
return 1;
61+
}
62+
63+
void binmoji_parse(const char *emoji_str, struct binmoji *binmoji)
64+
{
65+
const unsigned char *s;
66+
memset(binmoji, 0, sizeof(struct binmoji));
67+
s = (const unsigned char *)emoji_str;
68+
69+
while (*s) {
70+
uint32_t codepoint = 0;
71+
int len = 0;
72+
if (*s < 0x80) {
73+
len = 1;
74+
codepoint = s[0];
75+
} else if ((*s & 0xE0) == 0xC0) {
76+
len = 2;
77+
codepoint = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
78+
} else if ((*s & 0xF0) == 0xE0) {
79+
len = 3;
80+
codepoint = ((s[0] & 0x0F) << 12) |
81+
((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
82+
} else if ((*s & 0xF8) == 0xF0) {
83+
len = 4;
84+
codepoint = ((s[0] & 0x07) << 18) |
85+
((s[1] & 0x3F) << 12) |
86+
((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
87+
} else {
88+
s++;
89+
continue;
90+
}
91+
s += len;
92+
93+
if (codepoint >= 0x1F3FB && codepoint <= 0x1F3FF) {
94+
uint8_t tone_val = (codepoint - 0x1F3FB) + 1;
95+
if (binmoji->skin_tone1 == 0)
96+
binmoji->skin_tone1 = tone_val;
97+
else if (binmoji->skin_tone2 == 0)
98+
binmoji->skin_tone2 = tone_val;
99+
} else if (is_base_emoji(codepoint)) {
100+
if (binmoji->primary_codepoint == 0) {
101+
binmoji->primary_codepoint = codepoint;
102+
} else if (binmoji->component_count < 16) {
103+
binmoji->component_list
104+
[binmoji->component_count++] = codepoint;
105+
}
106+
}
107+
}
108+
binmoji->component_hash =
109+
crc32(binmoji->component_list, binmoji->component_count);
110+
}
111+
112+
uint64_t binmoji_encode(const struct binmoji *binmoji)
113+
{
114+
uint64_t id = 0;
115+
id |= ((uint64_t)(binmoji->primary_codepoint & PRIMARY_CP_MASK)
116+
<< PRIMARY_CP_SHIFT);
117+
id |= ((uint64_t)(binmoji->component_hash & HASH_MASK) << HASH_SHIFT);
118+
id |= ((uint64_t)(binmoji->skin_tone1 & TONE_MASK) << TONE1_SHIFT);
119+
id |= ((uint64_t)(binmoji->skin_tone2 & TONE_MASK) << TONE2_SHIFT);
120+
id |= ((uint64_t)(binmoji->flags & FLAGS_MASK) << FLAGS_SHIFT);
121+
return id;
122+
}
123+
124+
/**
125+
* @brief Comparison function for bsearch.
126+
*
127+
* Compares a target hash key against an EmojiHashEntry's hash.
128+
* @param key Pointer to the target uint32_t hash.
129+
* @param element Pointer to the EmojiHashEntry from the array.
130+
* @return <0 if key is less than element's hash, 0 if equal, >0 if greater.
131+
*/
132+
static int compare_emoji_hash(const void *key, const void *element)
133+
{
134+
const uint32_t hash_key = *(const uint32_t *)key;
135+
const EmojiHashEntry *entry = (const EmojiHashEntry *)element;
136+
137+
if (hash_key < entry->hash) {
138+
return -1;
139+
} else if (hash_key > entry->hash) {
140+
return 1;
141+
} else {
142+
return 0;
143+
}
144+
}
145+
146+
/**
147+
* @brief Optimized lookup using binary search.
148+
*/
149+
static int lookup_binmoji_by_hash(uint32_t hash, uint32_t *out_binmoji,
150+
size_t *out_count)
151+
{
152+
const EmojiHashEntry *result =
153+
bsearch(&hash, binmoji_table, num_hash_entries,
154+
sizeof(EmojiHashEntry), compare_emoji_hash);
155+
156+
if (result != NULL) {
157+
*out_count = result->count;
158+
memcpy(out_binmoji, result->components,
159+
(*out_count) * sizeof(uint32_t));
160+
return 1; /* Found */
161+
}
162+
163+
*out_count = 0;
164+
return 0; /* Not found */
165+
}
166+
167+
void binmoji_decode(uint64_t id, struct binmoji *binmoji)
168+
{
169+
memset(binmoji, 0, sizeof(struct binmoji));
170+
binmoji->primary_codepoint = (id >> PRIMARY_CP_SHIFT) & PRIMARY_CP_MASK;
171+
binmoji->component_hash = (id >> HASH_SHIFT) & HASH_MASK;
172+
binmoji->skin_tone1 = (id >> TONE1_SHIFT) & TONE_MASK;
173+
binmoji->skin_tone2 = (id >> TONE2_SHIFT) & TONE_MASK;
174+
binmoji->flags = (id >> FLAGS_SHIFT) & FLAGS_MASK;
175+
if (binmoji->component_hash != 0) {
176+
lookup_binmoji_by_hash(binmoji->component_hash,
177+
binmoji->component_list,
178+
&binmoji->component_count);
179+
}
180+
}
181+
182+
static int append_utf8(char *buf, size_t buf_size, size_t *offset,
183+
uint32_t codepoint)
184+
{
185+
char *p;
186+
int bytes_to_write = 0;
187+
188+
if (!buf)
189+
return 0;
190+
if (codepoint < 0x80)
191+
bytes_to_write = 1;
192+
else if (codepoint < 0x800)
193+
bytes_to_write = 2;
194+
else if (codepoint < 0x10000)
195+
bytes_to_write = 3;
196+
else if (codepoint < 0x110000)
197+
bytes_to_write = 4;
198+
else
199+
return 0;
200+
if (*offset + bytes_to_write >= buf_size)
201+
return 0;
202+
203+
p = buf + *offset;
204+
if (bytes_to_write == 1) {
205+
*p = (char)codepoint;
206+
} else if (bytes_to_write == 2) {
207+
p[0] = 0xC0 | (codepoint >> 6);
208+
p[1] = 0x80 | (codepoint & 0x3F);
209+
} else if (bytes_to_write == 3) {
210+
p[0] = 0xE0 | (codepoint >> 12);
211+
p[1] = 0x80 | ((codepoint >> 6) & 0x3F);
212+
p[2] = 0x80 | (codepoint & 0x3F);
213+
} else {
214+
p[0] = 0xF0 | (codepoint >> 18);
215+
p[1] = 0x80 | ((codepoint >> 12) & 0x3F);
216+
p[2] = 0x80 | ((codepoint >> 6) & 0x3F);
217+
p[3] = 0x80 | (codepoint & 0x3F);
218+
}
219+
*offset += bytes_to_write;
220+
return bytes_to_write;
221+
}
222+
223+
void binmoji_to_string(const struct binmoji *binmoji, char *out_str,
224+
size_t out_str_size)
225+
{
226+
size_t i, offset;
227+
uint32_t comp;
228+
int needs_zwj, is_country_flag, is_subdivision_flag, no_zwj_sequence;
229+
230+
if (!binmoji || !out_str || out_str_size == 0)
231+
return;
232+
233+
offset = 0;
234+
out_str[0] = '\0';
235+
236+
is_country_flag = (binmoji->primary_codepoint >= 0x1F1E6 &&
237+
binmoji->primary_codepoint <= 0x1F1FF);
238+
239+
is_subdivision_flag = (binmoji->primary_codepoint == 0x1F3F4 &&
240+
binmoji->component_count > 0 &&
241+
binmoji->component_list[0] >= 0xE0020 &&
242+
binmoji->component_list[0] <= 0xE007F);
243+
244+
no_zwj_sequence = is_country_flag || is_subdivision_flag;
245+
246+
if (binmoji->primary_codepoint > 0) {
247+
append_utf8(out_str, out_str_size, &offset,
248+
binmoji->primary_codepoint);
249+
}
250+
251+
if (binmoji->skin_tone1 > 0) {
252+
append_utf8(out_str, out_str_size, &offset,
253+
0x1F3FB + binmoji->skin_tone1 - 1);
254+
}
255+
256+
for (i = 0; i < binmoji->component_count; i++) {
257+
comp = binmoji->component_list[i];
258+
needs_zwj =
259+
(comp != 0xFE0F && comp != 0x20E3 && !no_zwj_sequence);
260+
261+
if (needs_zwj) {
262+
append_utf8(out_str, out_str_size, &offset,
263+
0x200D); /* ZWJ */
264+
}
265+
append_utf8(out_str, out_str_size, &offset, comp);
266+
267+
if (i == binmoji->component_count - 1 &&
268+
binmoji->skin_tone2 > 0) {
269+
append_utf8(out_str, out_str_size, &offset,
270+
0x1F3FB + binmoji->skin_tone2 - 1);
271+
}
272+
}
273+
274+
if (offset < out_str_size)
275+
out_str[offset] = '\0';
276+
else if (out_str_size > 0)
277+
out_str[out_str_size - 1] = '\0';
278+
}

src/binmoji.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
2+
#ifndef BINMOJI_H
3+
#define BINMOJI_H
4+
5+
#include <stdint.h>
6+
#include <stdlib.h>
7+
8+
struct binmoji {
9+
uint32_t primary_codepoint;
10+
uint32_t component_list[16];
11+
size_t component_count;
12+
uint32_t component_hash;
13+
uint8_t skin_tone1;
14+
uint8_t skin_tone2;
15+
uint8_t flags;
16+
};
17+
18+
static const uint64_t USER_FLAG_MASK = 1 << 3;
19+
20+
void binmoji_to_string(const struct binmoji *binmoji, char *out_str, size_t out_str_size);
21+
void binmoji_decode(uint64_t id, struct binmoji *binmoji);
22+
void binmoji_parse(const char *emoji, struct binmoji *binmoji);
23+
uint64_t binmoji_encode(const struct binmoji *binmoji);
24+
25+
/* some user flag helpers */
26+
static __inline uint64_t binmoji_set_user_flag(uint64_t binmoji, uint8_t enable) {
27+
return enable ? (binmoji | USER_FLAG_MASK) : (binmoji & ~USER_FLAG_MASK);
28+
}
29+
30+
static __inline uint8_t binmoji_get_user_flag(uint64_t binmoji) {
31+
return (binmoji & USER_FLAG_MASK) == USER_FLAG_MASK;
32+
}
33+
34+
#endif /* BINMOJI_H */

0 commit comments

Comments
 (0)