util.c - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
util.c (21467B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <ctype.h> | |
3 #include <errno.h> | |
4 #include <inttypes.h> | |
5 #include <stdbool.h> | |
6 #include <stddef.h> | |
7 #include <stdint.h> | |
8 #include <stdio.h> | |
9 #include <stdlib.h> | |
10 #include <string.h> | |
11 | |
12 #include "util.h" | |
13 | |
14 struct range { | |
15 uint_least32_t lower; | |
16 uint_least32_t upper; | |
17 }; | |
18 | |
19 struct properties_payload { | |
20 struct properties *prop; | |
21 const struct property_spec *spec; | |
22 uint_least8_t speclen; | |
23 int (*set_value)(struct properties_payload *, uint_least32_t, | |
24 int_least64_t); | |
25 uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, | |
26 uint_least8_t); | |
27 }; | |
28 | |
29 struct break_test_payload { | |
30 struct break_test **test; | |
31 size_t *testlen; | |
32 }; | |
33 | |
34 static void * | |
35 reallocate_array(void *p, size_t len, size_t size) | |
36 { | |
37 if (len > 0 && size > SIZE_MAX / len) { | |
38 errno = ENOMEM; | |
39 return NULL; | |
40 } | |
41 | |
42 return realloc(p, len * size); | |
43 } | |
44 | |
45 int | |
46 hextocp(const char *str, size_t len, uint_least32_t *cp) | |
47 { | |
48 size_t i; | |
49 int off; | |
50 char relative; | |
51 | |
52 /* the maximum valid codepoint is 0x10FFFF */ | |
53 if (len > 6) { | |
54 fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)l… | |
55 str); | |
56 return 1; | |
57 } | |
58 | |
59 for (i = 0, *cp = 0; i < len; i++) { | |
60 if (str[i] >= '0' && str[i] <= '9') { | |
61 relative = '0'; | |
62 off = 0; | |
63 } else if (str[i] >= 'a' && str[i] <= 'f') { | |
64 relative = 'a'; | |
65 off = 10; | |
66 } else if (str[i] >= 'A' && str[i] <= 'F') { | |
67 relative = 'A'; | |
68 off = 10; | |
69 } else { | |
70 fprintf(stderr, "hextocp: '%.*s' is not hexadeci… | |
71 (int)len, str); | |
72 return 1; | |
73 } | |
74 | |
75 *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) * | |
76 (uint_least32_t)(str[i] - relative + off); | |
77 } | |
78 | |
79 if (*cp > UINT32_C(0x10FFFF)) { | |
80 fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)… | |
81 str); | |
82 return 1; | |
83 } | |
84 | |
85 return 0; | |
86 } | |
87 | |
88 int | |
89 parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen) | |
90 { | |
91 size_t count, i; | |
92 const char *tmp1 = NULL, *tmp2 = NULL; | |
93 | |
94 if (strlen(str) == 0) { | |
95 *cp = NULL; | |
96 *cplen = 0; | |
97 return 0; | |
98 } | |
99 | |
100 /* count the number of spaces in the string and infer list lengt… | |
101 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; | |
102 count++, tmp1 = tmp2 + 1) { | |
103 ; | |
104 } | |
105 | |
106 /* allocate resources */ | |
107 if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { | |
108 fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
109 exit(1); | |
110 } | |
111 | |
112 /* go through the string again, parsing the numbers */ | |
113 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { | |
114 tmp2 = strchr(tmp1, ' '); | |
115 if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(… | |
116 &((*cp)[i]))) { | |
117 return 1; | |
118 } | |
119 if (tmp2 != NULL) { | |
120 tmp1 = tmp2 + 1; | |
121 } | |
122 } | |
123 | |
124 return 0; | |
125 } | |
126 | |
127 static int | |
128 range_parse(const char *str, struct range *range) | |
129 { | |
130 char *p; | |
131 | |
132 if ((p = strstr(str, "..")) == NULL) { | |
133 /* input has the form "XXXXXX" */ | |
134 if (hextocp(str, strlen(str), &range->lower)) { | |
135 return 1; | |
136 } | |
137 range->upper = range->lower; | |
138 } else { | |
139 /* input has the form "XXXXXX..XXXXXX" */ | |
140 if (hextocp(str, (size_t)(p - str), &range->lower) || | |
141 hextocp(p + 2, strlen(p + 2), &range->upper)) { | |
142 return 1; | |
143 } | |
144 } | |
145 | |
146 return 0; | |
147 } | |
148 | |
149 static bool | |
150 get_line(char **buf, size_t *bufsize, FILE *fp, size_t *len) | |
151 { | |
152 int ret = EOF; | |
153 | |
154 for (*len = 0;; (*len)++) { | |
155 if (*len > 0 && *buf != NULL && (*buf)[*len - 1] == '\n'… | |
156 /* | |
157 * if the previously read character was a newlin… | |
158 * we fake an end-of-file so we NUL-terminate and | |
159 * are done. | |
160 */ | |
161 ret = EOF; | |
162 } else { | |
163 ret = fgetc(fp); | |
164 } | |
165 | |
166 if (*len >= *bufsize) { | |
167 /* the buffer needs to be expanded */ | |
168 *bufsize += 512; | |
169 if ((*buf = realloc(*buf, *bufsize)) == NULL) { | |
170 fprintf(stderr, "get_line: Out of memory… | |
171 exit(1); | |
172 } | |
173 } | |
174 | |
175 if (ret != EOF) { | |
176 (*buf)[*len] = (char)ret; | |
177 } else { | |
178 (*buf)[*len] = '\0'; | |
179 break; | |
180 } | |
181 } | |
182 | |
183 return *len == 0 && (feof(fp) || ferror(fp)); | |
184 } | |
185 | |
186 void | |
187 parse_file_with_callback(const char *fname, | |
188 int (*callback)(const char *, char **, size_t, … | |
189 void *), | |
190 void *payload) | |
191 { | |
192 FILE *fp; | |
193 char *line = NULL, **field = NULL, *comment; | |
194 size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields, len; | |
195 | |
196 /* open file */ | |
197 if (!(fp = fopen(fname, "r"))) { | |
198 fprintf(stderr, "parse_file_with_callback: fopen '%s': %… | |
199 fname, strerror(errno)); | |
200 exit(1); | |
201 } | |
202 | |
203 while (!get_line(&line, &linebufsize, fp, &len)) { | |
204 /* remove trailing newline */ | |
205 if (len > 0 && line[len - 1] == '\n') { | |
206 line[len - 1] = '\0'; | |
207 len--; | |
208 } | |
209 | |
210 /* skip empty lines and comment lines */ | |
211 if (len == 0 || line[0] == '#') { | |
212 continue; | |
213 } | |
214 | |
215 /* tokenize line into fields */ | |
216 for (i = 0, nfields = 0, comment = NULL; i < (size_t)len… | |
217 /* skip leading whitespace */ | |
218 while (line[i] == ' ') { | |
219 i++; | |
220 } | |
221 | |
222 /* check if we crashed into the comment */ | |
223 if (line[i] != '#') { | |
224 /* extend field buffer, if necessary */ | |
225 if (++nfields > fieldbufsize) { | |
226 if ((field = realloc( | |
227 field, | |
228 nfields * | |
229 sizeof(*fie… | |
230 NULL) { | |
231 fprintf(stderr, | |
232 "parse_file_with… | |
233 "callback: reall… | |
234 "%s.\n", | |
235 strerror(errno)); | |
236 exit(1); | |
237 } | |
238 fieldbufsize = nfields; | |
239 } | |
240 | |
241 /* set current position as field start */ | |
242 field[nfields - 1] = &line[i]; | |
243 | |
244 /* continue until we reach ';' or '#' or… | |
245 while (line[i] != ';' && line[i] != '#' … | |
246 line[i] != '\0') { | |
247 i++; | |
248 } | |
249 } | |
250 | |
251 if (line[i] == '#') { | |
252 /* set comment-variable for later */ | |
253 comment = &line[i + 1]; | |
254 } | |
255 | |
256 /* go back whitespace and terminate field there … | |
257 if (i > 0) { | |
258 for (j = i - 1; line[j] == ' '; j--) { | |
259 ; | |
260 } | |
261 line[j + 1] = '\0'; | |
262 } else { | |
263 line[i] = '\0'; | |
264 } | |
265 | |
266 /* if comment is set, we are done */ | |
267 if (comment != NULL) { | |
268 break; | |
269 } | |
270 } | |
271 | |
272 /* skip leading whitespace in comment */ | |
273 while (comment != NULL && comment[0] == ' ') { | |
274 comment++; | |
275 } | |
276 | |
277 /* call callback function */ | |
278 if (callback(fname, field, nfields, comment, payload)) { | |
279 fprintf(stderr, "parse_file_with_callback: " | |
280 "Malformed input.\n"); | |
281 exit(1); | |
282 } | |
283 } | |
284 | |
285 /* close file */ | |
286 if (fclose(fp)) { | |
287 fprintf(stderr, "parse_file_with_callback: fclose '%s': … | |
288 fname, strerror(errno)); | |
289 exit(1); | |
290 } | |
291 | |
292 /* cleanup */ | |
293 free(line); | |
294 free(field); | |
295 } | |
296 | |
297 static int | |
298 properties_callback(const char *file, char **field, size_t nfields, | |
299 char *comment, void *payload) | |
300 { | |
301 /* prop always has the length 0x110000 */ | |
302 struct properties_payload *p = (struct properties_payload *)payl… | |
303 struct range r; | |
304 uint_least8_t i; | |
305 uint_least32_t cp; | |
306 | |
307 (void)comment; | |
308 | |
309 if (nfields < 2) { | |
310 return 1; | |
311 } | |
312 | |
313 for (i = 0; i < p->speclen; i++) { | |
314 /* identify fitting file and identifier */ | |
315 if (p->spec[i].file && !strcmp(p->spec[i].file, file) && | |
316 (!strcmp(p->spec[i].ucdname, field[1]) || | |
317 (comment != NULL && | |
318 !strncmp(p->spec[i].ucdname, comment, | |
319 strlen(p->spec[i].ucdname)) && | |
320 comment[strlen(p->spec[i].ucdname)] == ' ')) && | |
321 (p->spec[i].ucdsubname == NULL || | |
322 (nfields >= 3 && | |
323 !strcmp(p->spec[i].ucdsubname, field[2])))) { | |
324 /* parse range in first field */ | |
325 if (range_parse(field[0], &r)) { | |
326 return 1; | |
327 } | |
328 | |
329 /* apply to all codepoints in the range */ | |
330 for (cp = r.lower; cp <= r.upper; cp++) { | |
331 if (p->set_value(payload, cp, i)) { | |
332 exit(1); | |
333 } | |
334 } | |
335 break; | |
336 } | |
337 } | |
338 | |
339 return 0; | |
340 } | |
341 | |
342 void | |
343 properties_compress(const struct properties *prop, | |
344 struct properties_compressed *comp) | |
345 { | |
346 uint_least32_t cp, i; | |
347 | |
348 /* initialization */ | |
349 if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * | |
350 sizeof(*(comp->offset))))) { | |
351 fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
352 exit(1); | |
353 } | |
354 comp->data = NULL; | |
355 comp->datalen = 0; | |
356 | |
357 for (cp = 0; cp < UINT32_C(0x110000); cp++) { | |
358 for (i = 0; i < comp->datalen; i++) { | |
359 if (!memcmp(&(prop[cp]), &(comp->data[i]), | |
360 sizeof(*prop))) { | |
361 /* found a match! */ | |
362 comp->offset[cp] = i; | |
363 break; | |
364 } | |
365 } | |
366 if (i == comp->datalen) { | |
367 /* | |
368 * found no matching properties-struct, so | |
369 * add current properties to data and add the | |
370 * offset in the offset-table | |
371 */ | |
372 if (!(comp->data = reallocate_array( | |
373 comp->data, ++(comp->datalen), | |
374 sizeof(*(comp->data))))) { | |
375 fprintf(stderr, "reallocate_array: %s\n", | |
376 strerror(errno)); | |
377 exit(1); | |
378 } | |
379 memcpy(&(comp->data[comp->datalen - 1]), &(prop[… | |
380 sizeof(*prop)); | |
381 comp->offset[cp] = comp->datalen - 1; | |
382 } | |
383 } | |
384 } | |
385 | |
386 double | |
387 properties_get_major_minor(const struct properties_compressed *comp, | |
388 struct properties_major_minor *mm) | |
389 { | |
390 size_t i, j, compression_count = 0; | |
391 | |
392 /* | |
393 * we currently have an array comp->offset which maps the | |
394 * codepoints 0..0x110000 to offsets into comp->data. | |
395 * To improve cache-locality instead and allow a bit of | |
396 * compressing, instead of directly mapping a codepoint | |
397 * 0xAAAABB with comp->offset, we generate two arrays major | |
398 * and minor such that | |
399 * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB] | |
400 * This yields a major-array of length 2^16 and a minor array | |
401 * of variable length depending on how many common subsequences | |
402 * can be filtered out. | |
403 */ | |
404 | |
405 /* initialize */ | |
406 if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))… | |
407 fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
408 exit(1); | |
409 } | |
410 mm->minor = NULL; | |
411 mm->minorlen = 0; | |
412 | |
413 for (i = 0; i < (size_t)0x1100; i++) { | |
414 /* | |
415 * we now look at the cp-range (i << 8)..(i << 8 + 0xFF) | |
416 * and check if its corresponding offset-data already | |
417 * exists in minor (because then we just point there | |
418 * and need less storage) | |
419 */ | |
420 for (j = 0; j + 0xFF < mm->minorlen; j++) { | |
421 if (!memcmp(&(comp->offset[i << 8]), &(mm->minor… | |
422 sizeof(*(comp->offset)) * 0x100)) { | |
423 break; | |
424 } | |
425 } | |
426 if (j + 0xFF < mm->minorlen) { | |
427 /* found an index */ | |
428 compression_count++; | |
429 mm->major[i] = j; | |
430 } else { | |
431 /* | |
432 * add "new" sequence to minor and point to it | |
433 * in major | |
434 */ | |
435 mm->minorlen += 0x100; | |
436 if (!(mm->minor = | |
437 reallocate_array(mm->minor, mm->mi… | |
438 sizeof(*(mm->mino… | |
439 fprintf(stderr, "reallocate_array: %s\n", | |
440 strerror(errno)); | |
441 exit(1); | |
442 } | |
443 memcpy(&(mm->minor[mm->minorlen - 0x100]), | |
444 &(comp->offset[i << 8]), | |
445 sizeof(*(mm->minor)) * 0x100); | |
446 mm->major[i] = mm->minorlen - 0x100; | |
447 } | |
448 } | |
449 | |
450 /* return compression ratio */ | |
451 return (double)compression_count / 0x1100 * 100; | |
452 } | |
453 | |
454 void | |
455 properties_print_lookup_table(const char *name, const size_t *data, | |
456 size_t datalen) | |
457 { | |
458 const char *type; | |
459 size_t i, maxval; | |
460 | |
461 for (i = 0, maxval = 0; i < datalen; i++) { | |
462 if (data[i] > maxval) { | |
463 maxval = data[i]; | |
464 } | |
465 } | |
466 | |
467 type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : | |
468 (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : | |
469 (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : | |
470 "uint_least64_t"; | |
471 | |
472 printf("static const %s %s[] = {\n\t", type, name); | |
473 for (i = 0; i < datalen; i++) { | |
474 printf("%zu", data[i]); | |
475 if (i + 1 == datalen) { | |
476 printf("\n"); | |
477 } else if ((i + 1) % 8 != 0) { | |
478 printf(", "); | |
479 } else { | |
480 printf(",\n\t"); | |
481 } | |
482 } | |
483 printf("};\n"); | |
484 } | |
485 | |
486 void | |
487 properties_print_derived_lookup_table( | |
488 char *name, size_t *offset, size_t offsetlen, | |
489 int_least64_t (*get_value)(const struct properties *, size_t), | |
490 const void *payload) | |
491 { | |
492 const char *type; | |
493 size_t i; | |
494 int_least64_t minval, maxval; | |
495 | |
496 for (i = 0, minval = INT_LEAST64_MAX, maxval = INT_LEAST64_MIN; | |
497 i < offsetlen; i++) { | |
498 if (get_value(payload, offset[i]) > maxval) { | |
499 maxval = get_value(payload, offset[i]); | |
500 } else if (get_value(payload, offset[i]) < minval) { | |
501 minval = get_value(payload, offset[i]); | |
502 } | |
503 } | |
504 | |
505 if (minval < 0) { | |
506 /* we need a signed type */ | |
507 type = (minval >= INT_LEAST8_MIN && maxval <= INT_LEAST8… | |
508 "int_least8_t" : | |
509 (minval >= INT_LEAST16_MIN && | |
510 maxval <= INT_LEAST16_MAX) ? | |
511 "int_least16_t" : | |
512 (minval >= INT_LEAST32_MIN && | |
513 maxval <= INT_LEAST32_MAX) ? | |
514 "int_least32_t" : | |
515 "int_least64_t"; | |
516 } else { | |
517 /* we are fine with an unsigned type */ | |
518 type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : | |
519 (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : | |
520 (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : | |
521 "uint_least64_t"; | |
522 } | |
523 | |
524 printf("static const %s %s[] = {\n\t", type, name); | |
525 for (i = 0; i < offsetlen; i++) { | |
526 printf("%" PRIiLEAST64, get_value(payload, offset[i])); | |
527 if (i + 1 == offsetlen) { | |
528 printf("\n"); | |
529 } else if ((i + 1) % 8 != 0) { | |
530 printf(", "); | |
531 } else { | |
532 printf(",\n\t"); | |
533 } | |
534 } | |
535 printf("};\n"); | |
536 } | |
537 | |
538 static void | |
539 properties_print_enum(const struct property_spec *spec, size_t speclen, | |
540 const char *enumname, const char *enumprefix) | |
541 { | |
542 size_t i; | |
543 | |
544 printf("enum %s {\n", enumname); | |
545 for (i = 0; i < speclen; i++) { | |
546 printf("\t%s_%s,\n", enumprefix, spec[i].enumname); | |
547 } | |
548 printf("\tNUM_%sS,\n};\n\n", enumprefix); | |
549 } | |
550 | |
551 static int | |
552 set_value_bp(struct properties_payload *payload, uint_least32_t cp, | |
553 int_least64_t value) | |
554 { | |
555 if (payload->prop[cp].property != payload->speclen) { | |
556 if (payload->handle_conflict == NULL) { | |
557 fprintf(stderr, | |
558 "set_value_bp: " | |
559 "Unhandled character break property " | |
560 "overwrite for 0x%06X (%s <- %s).\n", | |
561 cp, | |
562 payload->spec[payload->prop[cp].property] | |
563 .enumname, | |
564 payload->spec[value].enumname); | |
565 return 1; | |
566 } else { | |
567 value = payload->handle_conflict( | |
568 cp, (uint_least8_t)payload->prop[cp].pro… | |
569 (uint_least8_t)value); | |
570 } | |
571 } | |
572 payload->prop[cp].property = value; | |
573 | |
574 return 0; | |
575 } | |
576 | |
577 static int_least64_t | |
578 get_value_bp(const struct properties *prop, size_t offset) | |
579 { | |
580 return prop[offset].property; | |
581 } | |
582 | |
583 void | |
584 properties_generate_break_property( | |
585 const struct property_spec *spec, uint_least8_t speclen, | |
586 uint_least8_t (*fill_missing)(uint_least32_t), | |
587 uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, | |
588 uint_least8_t), | |
589 void (*post_process)(struct properties *), const char *prefix, | |
590 const char *argv0) | |
591 { | |
592 struct properties_compressed comp; | |
593 struct properties_major_minor mm; | |
594 struct properties_payload payload; | |
595 struct properties *prop; | |
596 size_t i, j, prefixlen = strlen(prefix); | |
597 char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64]; | |
598 | |
599 /* | |
600 * allocate property buffer for all 0x110000 codepoints and | |
601 * initialize its entries to the known invalid value "speclen" | |
602 */ | |
603 if (!(prop = calloc(UINT32_C(0x110000), sizeof(*prop)))) { | |
604 fprintf(stderr, "calloc: %s\n", strerror(errno)); | |
605 exit(1); | |
606 } | |
607 for (i = 0; i < UINT32_C(0x110000); i++) { | |
608 prop[i].property = speclen; | |
609 } | |
610 | |
611 /* generate data */ | |
612 payload.prop = prop; | |
613 payload.spec = spec; | |
614 payload.speclen = speclen; | |
615 payload.set_value = set_value_bp; | |
616 payload.handle_conflict = handle_conflict; | |
617 | |
618 /* parse each file exactly once and ignore NULL-fields */ | |
619 for (i = 0; i < speclen; i++) { | |
620 for (j = 0; j < i; j++) { | |
621 if (spec[i].file && spec[j].file && | |
622 !strcmp(spec[i].file, spec[j].file)) { | |
623 /* file has already been parsed */ | |
624 break; | |
625 } | |
626 } | |
627 if (i == j && spec[i].file) { | |
628 /* file has not been processed yet */ | |
629 parse_file_with_callback(spec[i].file, | |
630 properties_callback, &p… | |
631 } | |
632 } | |
633 | |
634 /* fill in the missing properties that weren't explicitly given … | |
635 for (i = 0; i < UINT32_C(0x110000); i++) { | |
636 if (payload.prop[i].property == speclen) { | |
637 if (fill_missing != NULL) { | |
638 payload.prop[i].property = | |
639 fill_missing((uint_least32_t)i); | |
640 } else { | |
641 payload.prop[i].property = 0; | |
642 } | |
643 } | |
644 } | |
645 | |
646 /* post-processing */ | |
647 if (post_process != NULL) { | |
648 post_process(payload.prop); | |
649 } | |
650 | |
651 /* compress data */ | |
652 printf("/* Automatically generated by %s */\n#include <stdint.h>… | |
653 argv0); | |
654 properties_compress(prop, &comp); | |
655 | |
656 fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,… | |
657 properties_get_major_minor(&comp, &mm)); | |
658 | |
659 /* prepare names */ | |
660 if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= | |
661 LEN(buf1)) { | |
662 fprintf(stderr, "snprintf: String truncated.\n"); | |
663 exit(1); | |
664 } | |
665 if (LEN(prefix_uc) + 1 < prefixlen) { | |
666 fprintf(stderr, "snprintf: Buffer too small.\n"); | |
667 exit(1); | |
668 } | |
669 for (i = 0; i < prefixlen; i++) { | |
670 prefix_uc[i] = (char)toupper(prefix[i]); | |
671 } | |
672 prefix_uc[prefixlen] = '\0'; | |
673 if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= | |
674 LEN(buf2) || | |
675 (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= | |
676 LEN(buf3) || | |
677 (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= | |
678 LEN(buf4)) { | |
679 fprintf(stderr, "snprintf: String truncated.\n"); | |
680 exit(1); | |
681 } | |
682 | |
683 /* print data */ | |
684 properties_print_enum(spec, speclen, buf1, buf2); | |
685 properties_print_lookup_table(buf3, mm.major, 0x1100); | |
686 printf("\n"); | |
687 properties_print_derived_lookup_table(buf4, mm.minor, mm.minorle… | |
688 get_value_bp, comp.data); | |
689 | |
690 /* free data */ | |
691 free(prop); | |
692 free(comp.data); | |
693 free(comp.offset); | |
694 free(mm.major); | |
695 free(mm.minor); | |
696 } | |
697 | |
698 static int | |
699 break_test_callback(const char *fname, char **field, size_t nfields, | |
700 char *comment, void *payload) | |
701 { | |
702 struct break_test *t, | |
703 **test = ((struct break_test_payload *)payload)->test; | |
704 size_t i, *testlen = ((struct break_test_payload *)payload)->tes… | |
705 commentlen; | |
706 char *token; | |
707 | |
708 (void)fname; | |
709 | |
710 if (nfields < 1) { | |
711 return 1; | |
712 } | |
713 | |
714 /* append new testcase and initialize with zeroes */ | |
715 if ((*test = realloc(*test, ++(*testlen) * sizeof(**test))) == N… | |
716 fprintf(stderr, "break_test_callback: realloc: %s.\n", | |
717 strerror(errno)); | |
718 return 1; | |
719 } | |
720 t = &(*test)[*testlen - 1]; | |
721 memset(t, 0, sizeof(*t)); | |
722 | |
723 /* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */ | |
724 for (token = strtok(field[0], " "), i = 0; token != NULL; | |
725 i++, token = strtok(NULL, " ")) { | |
726 if (i % 2 == 0) { | |
727 /* delimiter or start of sequence */ | |
728 if (i == 0 || | |
729 !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */ | |
730 /* | |
731 * '÷' indicates a breakpoint, | |
732 * the current length is done; allocate | |
733 * a new length field and set it to 0 | |
734 */ | |
735 if ((t->len = realloc( | |
736 t->len, | |
737 ++t->lenlen * sizeof(*t->le… | |
738 NULL) { | |
739 fprintf(stderr, | |
740 "break_test_" | |
741 "callback: realloc: %s.\… | |
742 strerror(errno)); | |
743 return 1; | |
744 } | |
745 t->len[t->lenlen - 1] = 0; | |
746 } else if (!strncmp(token, "\xC3\x97", 2)) { /* … | |
747 /* '×' indicates a non-breakpoint, do n… | |
748 } else { | |
749 fprintf(stderr, | |
750 "break_test_callback: " | |
751 "Malformed delimiter '%s'.\n", | |
752 token); | |
753 return 1; | |
754 } | |
755 } else { | |
756 /* add codepoint to cp-array */ | |
757 if ((t->cp = realloc(t->cp, | |
758 ++t->cplen * sizeof(*t->cp)… | |
759 NULL) { | |
760 fprintf(stderr, | |
761 "break_test_callback: " | |
762 "realloc: %s.\n", | |
763 strerror(errno)); | |
764 return 1; | |
765 } | |
766 if (hextocp(token, strlen(token), | |
767 &t->cp[t->cplen - 1])) { | |
768 return 1; | |
769 } | |
770 if (t->lenlen > 0) { | |
771 t->len[t->lenlen - 1]++; | |
772 } | |
773 } | |
774 } | |
775 if (t->lenlen > 0 && t->len[t->lenlen - 1] == 0) { | |
776 /* | |
777 * we allocated one more length than we needed because | |
778 * the breakpoint was at the end | |
779 */ | |
780 t->lenlen--; | |
781 } | |
782 | |
783 /* store comment */ | |
784 if (comment != NULL) { | |
785 commentlen = strlen(comment) + 1; | |
786 if (((*test)[*testlen - 1].descr = malloc(commentlen)) == | |
787 NULL) { | |
788 fprintf(stderr, "break_test_callback: malloc: %s… | |
789 strerror(errno)); | |
790 return 1; | |
791 } | |
792 memcpy((*test)[*testlen - 1].descr, comment, commentlen); | |
793 } | |
794 | |
795 return 0; | |
796 } | |
797 | |
798 void | |
799 break_test_list_parse(char *fname, struct break_test **test, size_t *tes… | |
800 { | |
801 struct break_test_payload pl = { | |
802 .test = test, | |
803 .testlen = testlen, | |
804 }; | |
805 *test = NULL; | |
806 *testlen = 0; | |
807 | |
808 parse_file_with_callback(fname, break_test_callback, &pl); | |
809 } | |
810 | |
811 void | |
812 break_test_list_print(const struct break_test *test, size_t testlen, | |
813 const char *identifier, const char *progname) | |
814 { | |
815 size_t i, j; | |
816 | |
817 printf("/* Automatically generated by %s */\n" | |
818 "#include <stdint.h>\n#include <stddef.h>\n\n" | |
819 "#include \"../gen/types.h\"\n\n", | |
820 progname); | |
821 | |
822 printf("static const struct break_test %s[] = {\n", identifier); | |
823 for (i = 0; i < testlen; i++) { | |
824 printf("\t{\n"); | |
825 | |
826 printf("\t\t.cp = (uint_least32_t[]){"); | |
827 for (j = 0; j < test[i].cplen; j++) { | |
828 printf(" UINT32_C(0x%06X)", test[i].cp[j]); | |
829 if (j + 1 < test[i].cplen) { | |
830 putchar(','); | |
831 } | |
832 } | |
833 printf(" },\n"); | |
834 printf("\t\t.cplen = %zu,\n", test[i].cplen); | |
835 | |
836 printf("\t\t.len = (size_t[]){"); | |
837 for (j = 0; j < test[i].lenlen; j++) { | |
838 printf(" %zu", test[i].len[j]); | |
839 if (j + 1 < test[i].lenlen) { | |
840 putchar(','); | |
841 } | |
842 } | |
843 printf(" },\n"); | |
844 printf("\t\t.lenlen = %zu,\n", test[i].lenlen); | |
845 | |
846 printf("\t\t.descr = \"%s\",\n", test[i].descr); | |
847 | |
848 printf("\t},\n"); | |
849 } | |
850 printf("};\n"); | |
851 } | |
852 | |
853 void | |
854 break_test_list_free(struct break_test *test, size_t testlen) | |
855 { | |
856 size_t i; | |
857 | |
858 for (i = 0; i < testlen; i++) { | |
859 free(test[i].cp); | |
860 free(test[i].len); | |
861 free(test[i].descr); | |
862 } | |
863 | |
864 free(test); | |
865 } |