Introduction
Introduction Statistics Contact Development Disclaimer Help
line.c - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
line.c (11273B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "util.h"
7
8 #define FILE_EAW "data/EastAsianWidth.txt"
9 #define FILE_EMOJI "data/emoji-data.txt"
10 #define FILE_LINE "data/LineBreak.txt"
11
12 static const struct property_spec line_break_property[] = {
13 {
14 .enumname = "AL",
15 .file = FILE_LINE,
16 .ucdname = "AL",
17 },
18 /*
19 * Both extended pictographic and cn are large classes,
20 * but we are only interested in their intersection for LB30b,
21 * so we have the following two temporary classes. At first
22 * the extpict-class is filled, then the cn-class, which leads
23 * to conflicts (that we handle by putting them in the "proper"
24 * class BOTH_CN_EXTPICT). We make use of the fact that there
25 * is no intersection between AL and Cn.
26 *
27 * Any consecutive conflicts are permitted to overwrite
28 * TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
29 * them, and in the final postprocessing we "reset" all
30 * remaining matches (that then didn't fit any of the other
31 * classes) to the generic class AL.
32 */
33 {
34 .enumname = "TMP_CN",
35 .file = FILE_LINE,
36 .ucdname = "Cn",
37 },
38 {
39 .enumname = "TMP_EXTENDED_PICTOGRAPHIC",
40 .file = FILE_EMOJI,
41 .ucdname = "Extended_Pictographic",
42 },
43 /* end of special block */
44 {
45 .enumname = "B2",
46 .file = FILE_LINE,
47 .ucdname = "B2",
48 },
49 {
50 .enumname = "BA",
51 .file = FILE_LINE,
52 .ucdname = "BA",
53 },
54 {
55 .enumname = "BB",
56 .file = FILE_LINE,
57 .ucdname = "BB",
58 },
59 {
60 .enumname = "BK",
61 .file = FILE_LINE,
62 .ucdname = "BK",
63 },
64 {
65 .enumname = "BOTH_CN_EXTPICT",
66 .file = NULL,
67 .ucdname = NULL,
68 },
69 {
70 .enumname = "CB",
71 .file = FILE_LINE,
72 .ucdname = "CB",
73 },
74 {
75 .enumname = "CL",
76 .file = FILE_LINE,
77 .ucdname = "CL",
78 },
79 {
80 .enumname = "CM",
81 .file = FILE_LINE,
82 .ucdname = "CM",
83 },
84 {
85 .enumname = "CP_WITHOUT_EAW_HWF",
86 .file = FILE_LINE,
87 .ucdname = "CP",
88 },
89 {
90 .enumname = "CP_WITH_EAW_HWF",
91 .file = NULL,
92 .ucdname = NULL,
93 },
94 {
95 .enumname = "CR",
96 .file = FILE_LINE,
97 .ucdname = "CR",
98 },
99 {
100 .enumname = "EB",
101 .file = FILE_LINE,
102 .ucdname = "EB",
103 },
104 {
105 .enumname = "EM",
106 .file = FILE_LINE,
107 .ucdname = "EM",
108 },
109 {
110 .enumname = "EX",
111 .file = FILE_LINE,
112 .ucdname = "EX",
113 },
114 {
115 .enumname = "GL",
116 .file = FILE_LINE,
117 .ucdname = "GL",
118 },
119 {
120 .enumname = "H2",
121 .file = FILE_LINE,
122 .ucdname = "H2",
123 },
124 {
125 .enumname = "H3",
126 .file = FILE_LINE,
127 .ucdname = "H3",
128 },
129 {
130 .enumname = "HL",
131 .file = FILE_LINE,
132 .ucdname = "HL",
133 },
134 {
135 .enumname = "HY",
136 .file = FILE_LINE,
137 .ucdname = "HY",
138 },
139 {
140 .enumname = "ID",
141 .file = FILE_LINE,
142 .ucdname = "ID",
143 },
144 {
145 .enumname = "IN",
146 .file = FILE_LINE,
147 .ucdname = "IN",
148 },
149 {
150 .enumname = "IS",
151 .file = FILE_LINE,
152 .ucdname = "IS",
153 },
154 {
155 .enumname = "JL",
156 .file = FILE_LINE,
157 .ucdname = "JL",
158 },
159 {
160 .enumname = "JT",
161 .file = FILE_LINE,
162 .ucdname = "JT",
163 },
164 {
165 .enumname = "JV",
166 .file = FILE_LINE,
167 .ucdname = "JV",
168 },
169 {
170 .enumname = "LF",
171 .file = FILE_LINE,
172 .ucdname = "LF",
173 },
174 {
175 .enumname = "NL",
176 .file = FILE_LINE,
177 .ucdname = "NL",
178 },
179 {
180 .enumname = "NS",
181 .file = FILE_LINE,
182 .ucdname = "NS",
183 },
184 {
185 .enumname = "NU",
186 .file = FILE_LINE,
187 .ucdname = "NU",
188 },
189 {
190 .enumname = "OP_WITHOUT_EAW_HWF",
191 .file = FILE_LINE,
192 .ucdname = "OP",
193 },
194 {
195 .enumname = "OP_WITH_EAW_HWF",
196 .file = NULL,
197 .ucdname = NULL,
198 },
199 {
200 .enumname = "PO",
201 .file = FILE_LINE,
202 .ucdname = "PO",
203 },
204 {
205 .enumname = "PR",
206 .file = FILE_LINE,
207 .ucdname = "PR",
208 },
209 {
210 .enumname = "QU",
211 .file = FILE_LINE,
212 .ucdname = "QU",
213 },
214 {
215 .enumname = "RI",
216 .file = FILE_LINE,
217 .ucdname = "RI",
218 },
219 {
220 .enumname = "SP",
221 .file = FILE_LINE,
222 .ucdname = "SP",
223 },
224 {
225 .enumname = "SY",
226 .file = FILE_LINE,
227 .ucdname = "SY",
228 },
229 {
230 .enumname = "WJ",
231 .file = FILE_LINE,
232 .ucdname = "WJ",
233 },
234 {
235 .enumname = "ZW",
236 .file = FILE_LINE,
237 .ucdname = "ZW",
238 },
239 {
240 .enumname = "ZWJ",
241 .file = FILE_LINE,
242 .ucdname = "ZWJ",
243 },
244 {
245 .enumname = "TMP_AI",
246 .file = FILE_LINE,
247 .ucdname = "AI",
248 },
249 {
250 .enumname = "TMP_CJ",
251 .file = FILE_LINE,
252 .ucdname = "CJ",
253 },
254 {
255 .enumname = "TMP_XX",
256 .file = NULL,
257 .ucdname = NULL,
258 },
259 {
260 .enumname = "TMP_MN",
261 .file = FILE_LINE,
262 .ucdname = "Mn",
263 },
264 {
265 .enumname = "TMP_MC",
266 .file = FILE_LINE,
267 .ucdname = "Mc",
268 },
269 {
270 .enumname = "TMP_SA_WITHOUT_MN_OR_MC",
271 .file = FILE_LINE,
272 .ucdname = "SA",
273 },
274 {
275 .enumname = "TMP_SA_WITH_MN_OR_MC",
276 .file = FILE_LINE,
277 .ucdname = "SA",
278 },
279 {
280 .enumname = "TMP_SG",
281 .file = FILE_LINE,
282 .ucdname = "SG",
283 },
284 {
285 .enumname = "TMP_EAW_H",
286 .file = FILE_EAW,
287 .ucdname = "H",
288 },
289 {
290 .enumname = "TMP_EAW_W",
291 .file = FILE_EAW,
292 .ucdname = "W",
293 },
294 {
295 .enumname = "TMP_EAW_F",
296 .file = FILE_EAW,
297 .ucdname = "F",
298 },
299 };
300
301 static uint_least8_t
302 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t pr…
303 {
304 uint_least8_t result = prop2;
305 char *target = NULL;
306
307 (void)cp;
308
309 if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
310 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
311 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) …
312 (!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
313 !strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
314 !strcmp(line_break_property[prop2].enumname, "TMP_EAW_F")))…
315 if (!strcmp(line_break_property[prop1].enumname,
316 "CP_WITHOUT_EAW_HWF") ||
317 !strcmp(line_break_property[prop2].enumname,
318 "CP_WITHOUT_EAW_HWF")) {
319 target = "CP_WITH_EAW_HWF";
320 } else if (!strcmp(line_break_property[prop1].enumname,
321 "OP_WITHOUT_EAW_HWF") ||
322 !strcmp(line_break_property[prop2].enumname,
323 "OP_WITHOUT_EAW_HWF")) {
324 target = "OP_WITH_EAW_HWF";
325 } else {
326 /* ignore EAW for the rest */
327 if ((!strcmp(line_break_property[prop1].enumname,
328 "TMP_EAW_H") ||
329 !strcmp(line_break_property[prop1].enumname,
330 "TMP_EAW_W") ||
331 !strcmp(line_break_property[prop1].enumname,
332 "TMP_EAW_F"))) {
333 result = prop2;
334 } else {
335 result = prop1;
336 }
337 }
338 } else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN…
339 !strcmp(line_break_property[prop1].enumname, "TMP_MC…
340 (!strcmp(line_break_property[prop2].enumname, "TMP_MN…
341 !strcmp(line_break_property[prop2].enumname, "TMP_MC…
342 if (!strcmp(line_break_property[prop1].enumname,
343 "SA_WITHOUT_MN_OR_MC") ||
344 !strcmp(line_break_property[prop2].enumname,
345 "SA_WITHOUT_MN_OR_MC")) {
346 target = "SA_WITH_MN_OR_MC";
347 } else {
348 /* ignore Mn and Mc for the rest */
349 if ((!strcmp(line_break_property[prop1].enumname,
350 "TMP_MN") ||
351 !strcmp(line_break_property[prop1].enumname,
352 "TMP_MC"))) {
353 result = prop2;
354 } else {
355 result = prop1;
356 }
357 }
358 } else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN"…
359 !strcmp(line_break_property[prop2].enumname, "TMP_CN"…
360 if (!strcmp(line_break_property[prop1].enumname,
361 "TMP_EXTENDED_PICTOGRAPHIC") ||
362 !strcmp(line_break_property[prop2].enumname,
363 "TMP_EXTENDED_PICTOGRAPHIC")) {
364 target = "BOTH_CN_EXTPICT";
365 } else {
366 /* ignore Cn for all the other properties */
367 if (!strcmp(line_break_property[prop1].enumname,
368 "TMP_CN")) {
369 result = prop2;
370 } else {
371 result = prop1;
372 }
373 }
374 } else if (!strcmp(line_break_property[prop1].enumname,
375 "TMP_EXTENDED_PICTOGRAPHIC") ||
376 !strcmp(line_break_property[prop2].enumname,
377 "TMP_EXTENDED_PICTOGRAPHIC")) {
378 if (!strcmp(line_break_property[prop1].enumname, "TMP_CN…
379 !strcmp(line_break_property[prop2].enumname, "TMP_CN…
380 target = "BOTH_CN_EXTPICT";
381 } else {
382 /* ignore Extended_Pictographic for all the other
383 * properties */
384 if (!strcmp(line_break_property[prop1].enumname,
385 "TMP_EXTENDED_PICTOGRAPHIC")) {
386 result = prop2;
387 } else {
388 result = prop1;
389 }
390 }
391 } else {
392 fprintf(stderr,
393 "handle_conflict: Cannot handle conflict %s <- %…
394 line_break_property[prop1].enumname,
395 line_break_property[prop2].enumname);
396 exit(1);
397 }
398
399 if (target) {
400 for (result = 0; result < LEN(line_break_property); resu…
401 if (!strcmp(line_break_property[result].enumname,
402 target)) {
403 break;
404 }
405 }
406 if (result == LEN(line_break_property)) {
407 fprintf(stderr, "handle_conflict: Internal error…
408 exit(1);
409 }
410 }
411
412 return result;
413 }
414
415 static void
416 post_process(struct properties *prop)
417 {
418 const char *target;
419 uint_least8_t result;
420 size_t i;
421
422 /* post-mapping according to the line breaking algorithm */
423 for (i = 0; i < UINT32_C(0x110000); i++) {
424 /* LB1 */
425 if (!strcmp(line_break_property[prop[i].property].enumna…
426 "TMP_AI") ||
427 !strcmp(line_break_property[prop[i].property].enumna…
428 "TMP_SG") ||
429 !strcmp(line_break_property[prop[i].property].enumna…
430 "TMP_XX")) {
431 /* map AI, SG and XX to AL */
432 target = "AL";
433 } else if (!strcmp(line_break_property[prop[i].property]
434 .enumname,
435 "TMP_SA_WITH_MN_OR_MC")) {
436 /* map SA (with General_Category Mn or Mc) to CM…
437 target = "CM";
438 } else if (!strcmp(line_break_property[prop[i].property]
439 .enumname,
440 "TMP_SA_WITHOUT_MN_OR_MC")) {
441 /* map SA (without General_Category Mn or Mc) to…
442 target = "AL";
443 } else if (!strcmp(line_break_property[prop[i].property]
444 .enumname,
445 "TMP_CJ")) {
446 /* map CJ to NS */
447 target = "NS";
448 } else if (
449 !strcmp(line_break_property[prop[i].property].en…
450 "TMP_CN") ||
451 !strcmp(line_break_property[prop[i].property].en…
452 "TMP_EXTENDED_PICTOGRAPHIC") ||
453 !strcmp(line_break_property[prop[i].property].en…
454 "TMP_MN") ||
455 !strcmp(line_break_property[prop[i].property].en…
456 "TMP_MC") ||
457 !strcmp(line_break_property[prop[i].property].en…
458 "TMP_EAW_H") ||
459 !strcmp(line_break_property[prop[i].property].en…
460 "TMP_EAW_W") ||
461 !strcmp(line_break_property[prop[i].property].en…
462 "TMP_EAW_F")) {
463 /* map all the temporary classes "residue" to AL…
464 target = "AL";
465 } else {
466 target = NULL;
467 }
468
469 if (target) {
470 for (result = 0; result < LEN(line_break_propert…
471 result++) {
472 if (!strcmp(line_break_property[result]
473 .enumname,
474 target)) {
475 break;
476 }
477 }
478 if (result == LEN(line_break_property)) {
479 fprintf(stderr,
480 "handle_conflict: Internal error…
481 exit(1);
482 }
483
484 prop[i].property = result;
485 }
486 }
487 }
488
489 int
490 main(int argc, char *argv[])
491 {
492 (void)argc;
493
494 properties_generate_break_property(
495 line_break_property, LEN(line_break_property), NULL,
496 handle_conflict, post_process, "line_break", argv[0]);
497
498 return 0;
499 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.