word.c - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
word.c (3038B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 | |
6 #include "util.h" | |
7 | |
8 #define FILE_EMOJI "data/emoji-data.txt" | |
9 #define FILE_WORD "data/WordBreakProperty.txt" | |
10 | |
11 static const struct property_spec word_break_property[] = { | |
12 { | |
13 .enumname = "OTHER", | |
14 .file = NULL, | |
15 .ucdname = NULL, | |
16 }, | |
17 { | |
18 .enumname = "ALETTER", | |
19 .file = FILE_WORD, | |
20 .ucdname = "ALetter", | |
21 }, | |
22 { | |
23 .enumname = "BOTH_ALETTER_EXTPICT", | |
24 .file = NULL, | |
25 .ucdname = NULL, | |
26 }, | |
27 { | |
28 .enumname = "CR", | |
29 .file = FILE_WORD, | |
30 .ucdname = "CR", | |
31 }, | |
32 { | |
33 .enumname = "DOUBLE_QUOTE", | |
34 .file = FILE_WORD, | |
35 .ucdname = "Double_Quote", | |
36 }, | |
37 { | |
38 .enumname = "EXTEND", | |
39 .file = FILE_WORD, | |
40 .ucdname = "Extend", | |
41 }, | |
42 { | |
43 .enumname = "EXTENDED_PICTOGRAPHIC", | |
44 .file = FILE_EMOJI, | |
45 .ucdname = "Extended_Pictographic", | |
46 }, | |
47 { | |
48 .enumname = "EXTENDNUMLET", | |
49 .file = FILE_WORD, | |
50 .ucdname = "ExtendNumLet", | |
51 }, | |
52 { | |
53 .enumname = "FORMAT", | |
54 .file = FILE_WORD, | |
55 .ucdname = "Format", | |
56 }, | |
57 { | |
58 .enumname = "HEBREW_LETTER", | |
59 .file = FILE_WORD, | |
60 .ucdname = "Hebrew_Letter", | |
61 }, | |
62 { | |
63 .enumname = "KATAKANA", | |
64 .file = FILE_WORD, | |
65 .ucdname = "Katakana", | |
66 }, | |
67 { | |
68 .enumname = "LF", | |
69 .file = FILE_WORD, | |
70 .ucdname = "LF", | |
71 }, | |
72 { | |
73 .enumname = "MIDLETTER", | |
74 .file = FILE_WORD, | |
75 .ucdname = "MidLetter", | |
76 }, | |
77 { | |
78 .enumname = "MIDNUM", | |
79 .file = FILE_WORD, | |
80 .ucdname = "MidNum", | |
81 }, | |
82 { | |
83 .enumname = "MIDNUMLET", | |
84 .file = FILE_WORD, | |
85 .ucdname = "MidNumLet", | |
86 }, | |
87 { | |
88 .enumname = "NEWLINE", | |
89 .file = FILE_WORD, | |
90 .ucdname = "Newline", | |
91 }, | |
92 { | |
93 .enumname = "NUMERIC", | |
94 .file = FILE_WORD, | |
95 .ucdname = "Numeric", | |
96 }, | |
97 { | |
98 .enumname = "REGIONAL_INDICATOR", | |
99 .file = FILE_WORD, | |
100 .ucdname = "Regional_Indicator", | |
101 }, | |
102 { | |
103 .enumname = "SINGLE_QUOTE", | |
104 .file = FILE_WORD, | |
105 .ucdname = "Single_Quote", | |
106 }, | |
107 { | |
108 .enumname = "WSEGSPACE", | |
109 .file = FILE_WORD, | |
110 .ucdname = "WSegSpace", | |
111 }, | |
112 { | |
113 .enumname = "ZWJ", | |
114 .file = FILE_WORD, | |
115 .ucdname = "ZWJ", | |
116 }, | |
117 }; | |
118 | |
119 static uint_least8_t | |
120 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t pr… | |
121 { | |
122 uint_least8_t result; | |
123 | |
124 (void)cp; | |
125 | |
126 if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") && | |
127 !strcmp(word_break_property[prop2].enumname, | |
128 "EXTENDED_PICTOGRAPHIC")) || | |
129 (!strcmp(word_break_property[prop1].enumname, | |
130 "EXTENDED_PICTOGRAPHIC") && | |
131 !strcmp(word_break_property[prop2].enumname, "ALETTER"))) { | |
132 for (result = 0; result < LEN(word_break_property); resu… | |
133 if (!strcmp(word_break_property[result].enumname, | |
134 "BOTH_ALETTER_EXTPICT")) { | |
135 break; | |
136 } | |
137 } | |
138 if (result == LEN(word_break_property)) { | |
139 fprintf(stderr, "handle_conflict: Internal error… | |
140 exit(1); | |
141 } | |
142 } else { | |
143 fprintf(stderr, "handle_conflict: Cannot handle conflict… | |
144 exit(1); | |
145 } | |
146 | |
147 return result; | |
148 } | |
149 | |
150 int | |
151 main(int argc, char *argv[]) | |
152 { | |
153 (void)argc; | |
154 | |
155 properties_generate_break_property( | |
156 word_break_property, LEN(word_break_property), NULL, | |
157 handle_conflict, NULL, "word_break", argv[0]); | |
158 | |
159 return 0; | |
160 } |