Introduction
Introduction Statistics Contact Development Disclaimer Help
case.c - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
case.c (8442B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #include "util.h"
9
10 #define FILE_DCP "data/DerivedCoreProperties.txt"
11
12 static const struct property_spec case_property[] = {
13 {
14 .enumname = "OTHER",
15 .file = NULL,
16 .ucdname = NULL,
17 },
18 {
19 .enumname = "BOTH_CASED_CASE_IGNORABLE",
20 .file = NULL,
21 .ucdname = NULL,
22 },
23 {
24 .enumname = "CASED",
25 .file = FILE_DCP,
26 .ucdname = "Cased",
27 },
28 {
29 .enumname = "CASE_IGNORABLE",
30 .file = FILE_DCP,
31 .ucdname = "Case_Ignorable",
32 },
33 {
34 .enumname = "UNCASED",
35 .file = FILE_DCP,
36 .ucdname = "Uncased",
37 },
38 };
39
40 static uint_least8_t
41 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t pr…
42 {
43 uint_least8_t result;
44
45 (void)cp;
46
47 if ((!strcmp(case_property[prop1].enumname, "CASED") &&
48 !strcmp(case_property[prop2].enumname, "CASE_IGNORABLE")) ||
49 (!strcmp(case_property[prop1].enumname, "CASE_IGNORABLE") &&
50 !strcmp(case_property[prop2].enumname, "CASED"))) {
51 for (result = 0; result < LEN(case_property); result++) {
52 if (!strcmp(case_property[result].enumname,
53 "BOTH_CASED_CASE_IGNORABLE")) {
54 break;
55 }
56 }
57 if (result == LEN(case_property)) {
58 fprintf(stderr, "handle_conflict: Internal error…
59 exit(1);
60 }
61 } else {
62 fprintf(stderr, "handle_conflict: Cannot handle conflict…
63 exit(1);
64 }
65
66 return result;
67 }
68
69 static struct properties *prop_upper = NULL, *prop_lower, *prop_title;
70
71 static struct special_case {
72 struct {
73 uint_least32_t *cp;
74 size_t cplen;
75 } upper, lower, title;
76 } *sc = NULL;
77
78 static size_t sclen = 0;
79
80 static int
81 unicodedata_callback(const char *file, char **field, size_t nfields,
82 char *comment, void *payload)
83 {
84 uint_least32_t cp, upper, lower, title;
85
86 (void)file;
87 (void)comment;
88 (void)payload;
89
90 hextocp(field[0], strlen(field[0]), &cp);
91
92 upper = lower = title = cp;
93
94 if ((strlen(field[12]) > 0 &&
95 hextocp(field[12], strlen(field[12]), &upper)) ||
96 (strlen(field[13]) > 0 &&
97 hextocp(field[13], strlen(field[13]), &lower)) ||
98 (nfields >= 15 && strlen(field[14]) > 0 &&
99 hextocp(field[14], strlen(field[14]), &title))) {
100 return 1;
101 }
102
103 prop_upper[cp].property = (int_least32_t)upper - (int_least32_t)…
104 prop_lower[cp].property = (int_least32_t)lower - (int_least32_t)…
105 prop_title[cp].property = (int_least32_t)title - (int_least32_t)…
106
107 return 0;
108 }
109
110 static int
111 specialcasing_callback(const char *file, char **field, size_t nfields,
112 char *comment, void *payload)
113 {
114 uint_least32_t cp;
115
116 (void)file;
117 (void)comment;
118 (void)payload;
119
120 if (nfields > 4 && strlen(field[4]) > 0) {
121 /*
122 * we have more than 4 fields, i.e. the rule has a
123 * condition (language-sensitive, etc.) and is discarded
124 */
125 return 0;
126 }
127
128 /* parse affected codepoint */
129 hextocp(field[0], strlen(field[0]), &cp);
130
131 /* extend special case array */
132 if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) {
133 fprintf(stderr, "realloc: %s\n", strerror(errno));
134 exit(1);
135 }
136
137 /* parse field data */
138 parse_cp_list(field[3], &(sc[sclen - 1].upper.cp),
139 &(sc[sclen - 1].upper.cplen));
140 parse_cp_list(field[1], &(sc[sclen - 1].lower.cp),
141 &(sc[sclen - 1].lower.cplen));
142 parse_cp_list(field[2], &(sc[sclen - 1].title.cp),
143 &(sc[sclen - 1].title.cplen));
144
145 /*
146 * overwrite value in "single mapping" property table by the
147 * special value 0x110000 + (offset in special case array),
148 * even if the special case has length 1
149 */
150 prop_upper[cp].property =
151 (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
152 prop_lower[cp].property =
153 (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
154 prop_title[cp].property =
155 (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
156
157 return 0;
158 }
159
160 static int_least64_t
161 get_value(const struct properties *prop, size_t offset)
162 {
163 return prop[offset].property;
164 }
165
166 int
167 main(int argc, char *argv[])
168 {
169 struct properties_compressed comp_upper, comp_lower, comp_title;
170 struct properties_major_minor mm_upper, mm_lower, mm_title;
171 size_t i, j;
172
173 (void)argc;
174
175 /* generate case property table from the specification */
176 properties_generate_break_property(case_property, LEN(case_prope…
177 NULL, handle_conflict, NULL, …
178 argv[0]);
179
180 /*
181 * allocate property buffers for all 0x110000 codepoints
182 *
183 * the buffers contain the offset from the "base" character
184 * to the respective case mapping. By callocing we set all fields
185 * to zero, which is also the Unicode "default" in the sense that
186 * there is no case mapping by default (unless we fill it in)
187 */
188 if (!(prop_upper = calloc(UINT32_C(0x110000), sizeof(*prop_upper…
189 !(prop_lower = calloc(UINT32_C(0x110000), sizeof(*prop_lower…
190 !(prop_title = calloc(UINT32_C(0x110000), sizeof(*prop_title…
191 fprintf(stderr, "calloc: %s\n", strerror(errno));
192 exit(1);
193 }
194 parse_file_with_callback("data/UnicodeData.txt", unicodedata_cal…
195 NULL);
196 parse_file_with_callback("data/SpecialCasing.txt",
197 specialcasing_callback, NULL);
198
199 /* compress properties */
200 properties_compress(prop_upper, &comp_upper);
201 properties_compress(prop_lower, &comp_lower);
202 properties_compress(prop_title, &comp_title);
203
204 fprintf(stderr,
205 "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%,…
206 "title=%.2f%%\n",
207 argv[0], properties_get_major_minor(&comp_upper, &mm_upp…
208 properties_get_major_minor(&comp_lower, &mm_lower),
209 properties_get_major_minor(&comp_title, &mm_title));
210
211 /* print tables */
212 printf("/* Automatically generated by %s */\n#include "
213 "<stdint.h>\n#include <stddef.h>\n\n",
214 argv[0]);
215
216 printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t "
217 "cplen;\n};\n\n");
218
219 properties_print_lookup_table("upper_major", mm_upper.major, 0x1…
220 printf("\n");
221 properties_print_derived_lookup_table("upper_minor", mm_upper.mi…
222 mm_upper.minorlen, get_val…
223 comp_upper.data);
224 printf("\n");
225 properties_print_lookup_table("lower_major", mm_lower.major, 0x1…
226 printf("\n");
227 properties_print_derived_lookup_table("lower_minor", mm_lower.mi…
228 mm_lower.minorlen, get_val…
229 comp_lower.data);
230 printf("\n");
231 properties_print_lookup_table("title_major", mm_title.major, 0x1…
232 printf("\n");
233 properties_print_derived_lookup_table("title_minor", mm_title.mi…
234 mm_title.minorlen, get_val…
235 comp_title.data);
236 printf("\n");
237
238 printf("static const struct special_case upper_special[] = {\n");
239 for (i = 0; i < sclen; i++) {
240 printf("\t{\n");
241
242 printf("\t\t.cp = (uint_least32_t[]){");
243 for (j = 0; j < sc[i].upper.cplen; j++) {
244 printf(" UINT32_C(0x%06X)", sc[i].upper.cp[j]);
245 if (j + 1 < sc[i].upper.cplen) {
246 putchar(',');
247 }
248 }
249 printf(" },\n");
250 printf("\t\t.cplen = %zu,\n", sc[i].upper.cplen);
251 printf("\t},\n");
252 }
253 printf("};\n\n");
254
255 printf("static const struct special_case lower_special[] = {\n");
256 for (i = 0; i < sclen; i++) {
257 printf("\t{\n");
258
259 printf("\t\t.cp = (uint_least32_t[]){");
260 for (j = 0; j < sc[i].lower.cplen; j++) {
261 printf(" UINT32_C(0x%06X)", sc[i].lower.cp[j]);
262 if (j + 1 < sc[i].lower.cplen) {
263 putchar(',');
264 }
265 }
266 printf(" },\n");
267 printf("\t\t.cplen = %zu,\n", sc[i].lower.cplen);
268 printf("\t},\n");
269 }
270 printf("};\n\n");
271
272 printf("static const struct special_case title_special[] = {\n");
273 for (i = 0; i < sclen; i++) {
274 printf("\t{\n");
275
276 printf("\t\t.cp = (uint_least32_t[]){");
277 for (j = 0; j < sc[i].title.cplen; j++) {
278 printf(" UINT32_C(0x%06X)", sc[i].title.cp[j]);
279 if (j + 1 < sc[i].title.cplen) {
280 putchar(',');
281 }
282 }
283 printf(" },\n");
284 printf("\t\t.cplen = %zu,\n", sc[i].title.cplen);
285 printf("\t},\n");
286 }
287 printf("};\n\n");
288
289 free(comp_lower.data);
290 free(comp_lower.offset);
291 free(comp_title.data);
292 free(comp_title.offset);
293 free(comp_upper.data);
294 free(comp_upper.offset);
295 free(mm_lower.major);
296 free(mm_lower.minor);
297 free(mm_title.major);
298 free(mm_title.minor);
299 free(mm_upper.major);
300 free(mm_upper.minor);
301
302 return 0;
303 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.