case.c - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
case.c (16477B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <stdbool.h> | |
3 #include <stdint.h> | |
4 #include <stdio.h> | |
5 #include <string.h> | |
6 | |
7 #include "../grapheme.h" | |
8 #include "util.h" | |
9 | |
10 struct unit_test_is_case_utf8 { | |
11 const char *description; | |
12 | |
13 struct { | |
14 const char *src; | |
15 size_t srclen; | |
16 } input; | |
17 | |
18 struct { | |
19 bool ret; | |
20 size_t caselen; | |
21 } output; | |
22 }; | |
23 | |
24 struct unit_test_to_case_utf8 { | |
25 const char *description; | |
26 | |
27 struct { | |
28 const char *src; | |
29 size_t srclen; | |
30 size_t destlen; | |
31 } input; | |
32 | |
33 struct { | |
34 const char *dest; | |
35 size_t ret; | |
36 } output; | |
37 }; | |
38 | |
39 static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = { | |
40 { | |
41 .description = "empty input", | |
42 .input = { "", 0 }, | |
43 .output = { true, 0 }, | |
44 }, | |
45 { | |
46 .description = "one character, violation", | |
47 .input = { "A", 1 }, | |
48 .output = { false, 0 }, | |
49 }, | |
50 { | |
51 .description = "one character, confirmation", | |
52 .input = { "\xC3\x9F", 2 }, | |
53 .output = { true, 2 }, | |
54 }, | |
55 { | |
56 .description = "one character, violation, NUL-terminated… | |
57 .input = { "A", SIZE_MAX }, | |
58 .output = { false, 0 }, | |
59 }, | |
60 { | |
61 .description = "one character, confirmation, NUL-termina… | |
62 .input = { "\xC3\x9F", SIZE_MAX }, | |
63 .output = { true, 2 }, | |
64 }, | |
65 { | |
66 .description = "one word, violation", | |
67 .input = { "Hello", 5 }, | |
68 .output = { false, 0 }, | |
69 }, | |
70 { | |
71 .description = "one word, partial confirmation", | |
72 .input = { "gru" | |
73 "\xC3\x9F" | |
74 "fOrmel", | |
75 11 }, | |
76 .output = { false, 6 }, | |
77 }, | |
78 { | |
79 .description = "one word, full confirmation", | |
80 .input = { "gru" | |
81 "\xC3\x9F" | |
82 "formel", | |
83 11 }, | |
84 .output = { true, 11 }, | |
85 }, | |
86 { | |
87 .description = "one word, violation, NUL-terminated", | |
88 .input = { "Hello", SIZE_MAX }, | |
89 .output = { false, 0 }, | |
90 }, | |
91 { | |
92 .description = "one word, partial confirmation, NUL-term… | |
93 .input = { "gru" | |
94 "\xC3\x9F" | |
95 "fOrmel", | |
96 SIZE_MAX }, | |
97 .output = { false, 6 }, | |
98 }, | |
99 { | |
100 .description = "one word, full confirmation, NUL-termina… | |
101 .input = { "gru" | |
102 "\xC3\x9F" | |
103 "formel", | |
104 SIZE_MAX }, | |
105 .output = { true, 11 }, | |
106 }, | |
107 }; | |
108 | |
109 static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = { | |
110 { | |
111 .description = "empty input", | |
112 .input = { "", 0 }, | |
113 .output = { true, 0 }, | |
114 }, | |
115 { | |
116 .description = "one character, violation", | |
117 .input = { "\xC3\x9F", 2 }, | |
118 .output = { false, 0 }, | |
119 }, | |
120 { | |
121 .description = "one character, confirmation", | |
122 .input = { "A", 1 }, | |
123 .output = { true, 1 }, | |
124 }, | |
125 { | |
126 .description = "one character, violation, NUL-terminated… | |
127 .input = { "\xC3\x9F", SIZE_MAX }, | |
128 .output = { false, 0 }, | |
129 }, | |
130 { | |
131 .description = "one character, confirmation, NUL-termina… | |
132 .input = { "A", SIZE_MAX }, | |
133 .output = { true, 1 }, | |
134 }, | |
135 { | |
136 .description = "one word, violation", | |
137 .input = { "hello", 5 }, | |
138 .output = { false, 0 }, | |
139 }, | |
140 { | |
141 .description = "one word, partial confirmation", | |
142 .input = { "GRU" | |
143 "\xC3\x9F" | |
144 "formel", | |
145 11 }, | |
146 .output = { false, 3 }, | |
147 }, | |
148 { | |
149 .description = "one word, full confirmation", | |
150 .input = { "HELLO", 5 }, | |
151 .output = { true, 5 }, | |
152 }, | |
153 { | |
154 .description = "one word, violation, NUL-terminated", | |
155 .input = { "hello", SIZE_MAX }, | |
156 .output = { false, 0 }, | |
157 }, | |
158 { | |
159 .description = "one word, partial confirmation, NUL-term… | |
160 .input = { "GRU" | |
161 "\xC3\x9F" | |
162 "formel", | |
163 SIZE_MAX }, | |
164 .output = { false, 3 }, | |
165 }, | |
166 { | |
167 .description = "one word, full confirmation, NUL-termina… | |
168 .input = { "HELLO", SIZE_MAX }, | |
169 .output = { true, 5 }, | |
170 }, | |
171 }; | |
172 | |
173 static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = { | |
174 { | |
175 .description = "empty input", | |
176 .input = { "", 0 }, | |
177 .output = { true, 0 }, | |
178 }, | |
179 { | |
180 .description = "one character, violation", | |
181 .input = { "\xC3\x9F", 2 }, | |
182 .output = { false, 0 }, | |
183 }, | |
184 { | |
185 .description = "one character, confirmation", | |
186 .input = { "A", 1 }, | |
187 .output = { true, 1 }, | |
188 }, | |
189 { | |
190 .description = "one character, violation, NUL-terminated… | |
191 .input = { "\xC3\x9F", SIZE_MAX }, | |
192 .output = { false, 0 }, | |
193 }, | |
194 { | |
195 .description = "one character, confirmation, NUL-termina… | |
196 .input = { "A", SIZE_MAX }, | |
197 .output = { true, 1 }, | |
198 }, | |
199 { | |
200 .description = "one word, violation", | |
201 .input = { "hello", 5 }, | |
202 .output = { false, 0 }, | |
203 }, | |
204 { | |
205 .description = "one word, partial confirmation", | |
206 .input = { "Gru" | |
207 "\xC3\x9F" | |
208 "fOrmel", | |
209 11 }, | |
210 .output = { false, 6 }, | |
211 }, | |
212 { | |
213 .description = "one word, full confirmation", | |
214 .input = { "Gru" | |
215 "\xC3\x9F" | |
216 "formel", | |
217 11 }, | |
218 .output = { true, 11 }, | |
219 }, | |
220 { | |
221 .description = "one word, violation, NUL-terminated", | |
222 .input = { "hello", SIZE_MAX }, | |
223 .output = { false, 0 }, | |
224 }, | |
225 { | |
226 .description = "one word, partial confirmation, NUL-term… | |
227 .input = { "Gru" | |
228 "\xC3\x9F" | |
229 "fOrmel", | |
230 SIZE_MAX }, | |
231 .output = { false, 6 }, | |
232 }, | |
233 { | |
234 .description = "one word, full confirmation, NUL-termina… | |
235 .input = { "Gru" | |
236 "\xC3\x9F" | |
237 "formel", | |
238 SIZE_MAX }, | |
239 .output = { true, 11 }, | |
240 }, | |
241 { | |
242 .description = "multiple words, partial confirmation", | |
243 .input = { "Hello Gru" | |
244 "\xC3\x9F" | |
245 "fOrmel!", | |
246 18 }, | |
247 .output = { false, 12 }, | |
248 }, | |
249 { | |
250 .description = "multiple words, full confirmation", | |
251 .input = { "Hello Gru" | |
252 "\xC3\x9F" | |
253 "formel!", | |
254 18 }, | |
255 .output = { true, 18 }, | |
256 }, | |
257 { | |
258 .description = | |
259 "multiple words, partial confirmation, NUL-termi… | |
260 .input = { "Hello Gru" | |
261 "\xC3\x9F" | |
262 "fOrmel!", | |
263 SIZE_MAX }, | |
264 .output = { false, 12 }, | |
265 }, | |
266 { | |
267 .description = | |
268 "multiple words, full confirmation, NUL-terminat… | |
269 .input = { "Hello Gru" | |
270 "\xC3\x9F" | |
271 "formel!", | |
272 SIZE_MAX }, | |
273 .output = { true, 18 }, | |
274 }, | |
275 }; | |
276 | |
277 static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = { | |
278 { | |
279 .description = "empty input", | |
280 .input = { "", 0, 10 }, | |
281 .output = { "", 0 }, | |
282 }, | |
283 { | |
284 .description = "empty output", | |
285 .input = { "hello", 5, 0 }, | |
286 .output = { "", 5 }, | |
287 }, | |
288 { | |
289 .description = "one character, conversion", | |
290 .input = { "A", 1, 10 }, | |
291 .output = { "a", 1 }, | |
292 }, | |
293 { | |
294 .description = "one character, no conversion", | |
295 .input = { "\xC3\x9F", 2, 10 }, | |
296 .output = { "\xC3\x9F", 2 }, | |
297 }, | |
298 { | |
299 .description = "one character, conversion, truncation", | |
300 .input = { "A", 1, 0 }, | |
301 .output = { "", 1 }, | |
302 }, | |
303 { | |
304 .description = "one character, conversion, NUL-terminate… | |
305 .input = { "A", SIZE_MAX, 10 }, | |
306 .output = { "a", 1 }, | |
307 }, | |
308 { | |
309 .description = "one character, no conversion, NUL-termin… | |
310 .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
311 .output = { "\xC3\x9F", 2 }, | |
312 }, | |
313 { | |
314 .description = | |
315 "one character, conversion, NUL-terminated, trun… | |
316 .input = { "A", SIZE_MAX, 0 }, | |
317 .output = { "", 1 }, | |
318 }, | |
319 { | |
320 .description = "one word, conversion", | |
321 .input = { "wOrD", 4, 10 }, | |
322 .output = { "word", 4 }, | |
323 }, | |
324 { | |
325 .description = "one word, no conversion", | |
326 .input = { "word", 4, 10 }, | |
327 .output = { "word", 4 }, | |
328 }, | |
329 { | |
330 .description = "one word, conversion, truncation", | |
331 .input = { "wOrD", 4, 3 }, | |
332 .output = { "wo", 4 }, | |
333 }, | |
334 { | |
335 .description = "one word, conversion, NUL-terminated", | |
336 .input = { "wOrD", SIZE_MAX, 10 }, | |
337 .output = { "word", 4 }, | |
338 }, | |
339 { | |
340 .description = "one word, no conversion, NUL-terminated", | |
341 .input = { "word", SIZE_MAX, 10 }, | |
342 .output = { "word", 4 }, | |
343 }, | |
344 { | |
345 .description = | |
346 "one word, conversion, NUL-terminated, truncatio… | |
347 .input = { "wOrD", SIZE_MAX, 3 }, | |
348 .output = { "wo", 4 }, | |
349 }, | |
350 }; | |
351 | |
352 static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = { | |
353 { | |
354 .description = "empty input", | |
355 .input = { "", 0, 10 }, | |
356 .output = { "", 0 }, | |
357 }, | |
358 { | |
359 .description = "empty output", | |
360 .input = { "hello", 5, 0 }, | |
361 .output = { "", 5 }, | |
362 }, | |
363 { | |
364 .description = "one character, conversion", | |
365 .input = { "\xC3\x9F", 2, 10 }, | |
366 .output = { "SS", 2 }, | |
367 }, | |
368 { | |
369 .description = "one character, no conversion", | |
370 .input = { "A", 1, 10 }, | |
371 .output = { "A", 1 }, | |
372 }, | |
373 { | |
374 .description = "one character, conversion, truncation", | |
375 .input = { "\xC3\x9F", 2, 0 }, | |
376 .output = { "", 2 }, | |
377 }, | |
378 { | |
379 .description = "one character, conversion, NUL-terminate… | |
380 .input = { "\xC3\x9F", SIZE_MAX, 10 }, | |
381 .output = { "SS", 2 }, | |
382 }, | |
383 { | |
384 .description = "one character, no conversion, NUL-termin… | |
385 .input = { "A", SIZE_MAX, 10 }, | |
386 .output = { "A", 1 }, | |
387 }, | |
388 { | |
389 .description = | |
390 "one character, conversion, NUL-terminated, trun… | |
391 .input = { "\xC3\x9F", SIZE_MAX, 0 }, | |
392 .output = { "", 2 }, | |
393 }, | |
394 { | |
395 .description = "one word, conversion", | |
396 .input = { "gRu" | |
397 "\xC3\x9F" | |
398 "fOrMel", | |
399 11, 15 }, | |
400 .output = { "GRUSSFORMEL", 11 }, | |
401 }, | |
402 { | |
403 .description = "one word, no conversion", | |
404 .input = { "WORD", 4, 10 }, | |
405 .output = { "WORD", 4 }, | |
406 }, | |
407 { | |
408 .description = "one word, conversion, truncation", | |
409 .input = { "gRu" | |
410 "\xC3\x9F" | |
411 "formel", | |
412 11, 5 }, | |
413 .output = { "GRUS", 11 }, | |
414 }, | |
415 { | |
416 .description = "one word, conversion, NUL-terminated", | |
417 .input = { "gRu" | |
418 "\xC3\x9F" | |
419 "formel", | |
420 SIZE_MAX, 15 }, | |
421 .output = { "GRUSSFORMEL", 11 }, | |
422 }, | |
423 { | |
424 .description = "one word, no conversion, NUL-terminated", | |
425 .input = { "WORD", SIZE_MAX, 10 }, | |
426 .output = { "WORD", 4 }, | |
427 }, | |
428 { | |
429 .description = | |
430 "one word, conversion, NUL-terminated, truncatio… | |
431 .input = { "gRu" | |
432 "\xC3\x9F" | |
433 "formel", | |
434 SIZE_MAX, 5 }, | |
435 .output = { "GRUS", 11 }, | |
436 }, | |
437 }; | |
438 | |
439 static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = { | |
440 { | |
441 .description = "empty input", | |
442 .input = { "", 0, 10 }, | |
443 .output = { "", 0 }, | |
444 }, | |
445 { | |
446 .description = "empty output", | |
447 .input = { "hello", 5, 0 }, | |
448 .output = { "", 5 }, | |
449 }, | |
450 { | |
451 .description = "one character, conversion", | |
452 .input = { "a", 1, 10 }, | |
453 .output = { "A", 1 }, | |
454 }, | |
455 { | |
456 .description = "one character, no conversion", | |
457 .input = { "A", 1, 10 }, | |
458 .output = { "A", 1 }, | |
459 }, | |
460 { | |
461 .description = "one character, conversion, truncation", | |
462 .input = { "a", 1, 0 }, | |
463 .output = { "", 1 }, | |
464 }, | |
465 { | |
466 .description = "one character, conversion, NUL-terminate… | |
467 .input = { "a", SIZE_MAX, 10 }, | |
468 .output = { "A", 1 }, | |
469 }, | |
470 { | |
471 .description = "one character, no conversion, NUL-termin… | |
472 .input = { "A", SIZE_MAX, 10 }, | |
473 .output = { "A", 1 }, | |
474 }, | |
475 { | |
476 .description = | |
477 "one character, conversion, NUL-terminated, trun… | |
478 .input = { "a", SIZE_MAX, 0 }, | |
479 .output = { "", 1 }, | |
480 }, | |
481 { | |
482 .description = "one word, conversion", | |
483 .input = { "heLlo", 5, 10 }, | |
484 .output = { "Hello", 5 }, | |
485 }, | |
486 { | |
487 .description = "one word, no conversion", | |
488 .input = { "Hello", 5, 10 }, | |
489 .output = { "Hello", 5 }, | |
490 }, | |
491 { | |
492 .description = "one word, conversion, truncation", | |
493 .input = { "heLlo", 5, 2 }, | |
494 .output = { "H", 5 }, | |
495 }, | |
496 { | |
497 .description = "one word, conversion, NUL-terminated", | |
498 .input = { "heLlo", SIZE_MAX, 10 }, | |
499 .output = { "Hello", 5 }, | |
500 }, | |
501 { | |
502 .description = "one word, no conversion, NUL-terminated", | |
503 .input = { "Hello", SIZE_MAX, 10 }, | |
504 .output = { "Hello", 5 }, | |
505 }, | |
506 { | |
507 .description = | |
508 "one word, conversion, NUL-terminated, truncatio… | |
509 .input = { "heLlo", SIZE_MAX, 3 }, | |
510 .output = { "He", 5 }, | |
511 }, | |
512 { | |
513 .description = "two words, conversion", | |
514 .input = { "heLlo wORLd!", 12, 20 }, | |
515 .output = { "Hello World!", 12 }, | |
516 }, | |
517 { | |
518 .description = "two words, no conversion", | |
519 .input = { "Hello World!", 12, 20 }, | |
520 .output = { "Hello World!", 12 }, | |
521 }, | |
522 { | |
523 .description = "two words, conversion, truncation", | |
524 .input = { "heLlo wORLd!", 12, 8 }, | |
525 .output = { "Hello W", 12 }, | |
526 }, | |
527 { | |
528 .description = "two words, conversion, NUL-terminated", | |
529 .input = { "heLlo wORLd!", SIZE_MAX, 20 }, | |
530 .output = { "Hello World!", 12 }, | |
531 }, | |
532 { | |
533 .description = "two words, no conversion, NUL-terminated… | |
534 .input = { "Hello World!", SIZE_MAX, 20 }, | |
535 .output = { "Hello World!", 12 }, | |
536 }, | |
537 { | |
538 .description = | |
539 "two words, conversion, NUL-terminated, truncati… | |
540 .input = { "heLlo wORLd!", SIZE_MAX, 4 }, | |
541 .output = { "Hel", 12 }, | |
542 }, | |
543 }; | |
544 | |
545 static int | |
546 unit_test_callback_is_case_utf8(const void *t, size_t off, const char *n… | |
547 const char *argv0) | |
548 { | |
549 const struct unit_test_is_case_utf8 *test = | |
550 (const struct unit_test_is_case_utf8 *)t + off; | |
551 bool ret = false; | |
552 size_t caselen = 0x7f; | |
553 | |
554 if (t == is_lowercase_utf8) { | |
555 ret = grapheme_is_lowercase_utf8(test->input.src, | |
556 test->input.srclen, &ca… | |
557 } else if (t == is_uppercase_utf8) { | |
558 ret = grapheme_is_uppercase_utf8(test->input.src, | |
559 test->input.srclen, &ca… | |
560 } else if (t == is_titlecase_utf8) { | |
561 ret = grapheme_is_titlecase_utf8(test->input.src, | |
562 test->input.srclen, &ca… | |
563 | |
564 } else { | |
565 goto err; | |
566 } | |
567 | |
568 /* check results */ | |
569 if (ret != test->output.ret || caselen != test->output.caselen) { | |
570 goto err; | |
571 } | |
572 | |
573 return 0; | |
574 err: | |
575 fprintf(stderr, | |
576 "%s: %s: Failed unit test %zu \"%s\" " | |
577 "(returned (%s, %zu) instead of (%s, %zu)).\n", | |
578 argv0, name, off, test->description, ret ? "true" : "fal… | |
579 caselen, test->output.ret ? "true" : "false", | |
580 test->output.caselen); | |
581 return 1; | |
582 } | |
583 | |
584 static int | |
585 unit_test_callback_to_case_utf8(const void *t, size_t off, const char *n… | |
586 const char *argv0) | |
587 { | |
588 const struct unit_test_to_case_utf8 *test = | |
589 (const struct unit_test_to_case_utf8 *)t + off; | |
590 size_t ret = 0, i; | |
591 char buf[512]; | |
592 | |
593 /* fill the array with canary values */ | |
594 memset(buf, 0x7f, LEN(buf)); | |
595 | |
596 if (t == to_lowercase_utf8) { | |
597 ret = grapheme_to_lowercase_utf8(test->input.src, | |
598 test->input.srclen, buf, | |
599 test->input.destlen); | |
600 } else if (t == to_uppercase_utf8) { | |
601 ret = grapheme_to_uppercase_utf8(test->input.src, | |
602 test->input.srclen, buf, | |
603 test->input.destlen); | |
604 } else if (t == to_titlecase_utf8) { | |
605 ret = grapheme_to_titlecase_utf8(test->input.src, | |
606 test->input.srclen, buf, | |
607 test->input.destlen); | |
608 } else { | |
609 goto err; | |
610 } | |
611 | |
612 /* check results */ | |
613 if (ret != test->output.ret || | |
614 memcmp(buf, test->output.dest, | |
615 MIN(test->input.destlen, test->output.ret))) { | |
616 goto err; | |
617 } | |
618 | |
619 /* check that none of the canary values have been overwritten */ | |
620 for (i = test->input.destlen; i < LEN(buf); i++) { | |
621 if (buf[i] != 0x7f) { | |
622 goto err; | |
623 } | |
624 } | |
625 | |
626 return 0; | |
627 err: | |
628 fprintf(stderr, | |
629 "%s: %s: Failed unit test %zu \"%s\" " | |
630 "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\… | |
631 argv0, name, off, test->description, (int)ret, buf, ret, | |
632 (int)test->output.ret, test->output.dest, test->output.r… | |
633 return 1; | |
634 } | |
635 | |
636 int | |
637 main(int argc, char *argv[]) | |
638 { | |
639 (void)argc; | |
640 | |
641 return run_unit_tests(unit_test_callback_is_case_utf8, | |
642 is_lowercase_utf8, LEN(is_lowercase_utf8), | |
643 "grapheme_is_lowercase_utf8", argv[0]) + | |
644 run_unit_tests(unit_test_callback_is_case_utf8, | |
645 is_uppercase_utf8, LEN(is_uppercase_utf8), | |
646 "grapheme_is_uppercase_utf8", argv[0]) + | |
647 run_unit_tests(unit_test_callback_is_case_utf8, | |
648 is_titlecase_utf8, LEN(is_titlecase_utf8), | |
649 "grapheme_is_titlecase_utf8", argv[0]) + | |
650 run_unit_tests(unit_test_callback_to_case_utf8, | |
651 to_lowercase_utf8, LEN(to_lowercase_utf8), | |
652 "grapheme_to_lowercase_utf8", argv[0]) + | |
653 run_unit_tests(unit_test_callback_to_case_utf8, | |
654 to_uppercase_utf8, LEN(to_uppercase_utf8), | |
655 "grapheme_to_uppercase_utf8", argv[0]) + | |
656 run_unit_tests(unit_test_callback_to_case_utf8, | |
657 to_titlecase_utf8, LEN(to_titlecase_utf8), | |
658 "grapheme_to_titlecase_utf8", argv[0]); | |
659 } |