utf8-decode.c - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
utf8-decode.c (2129B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <errno.h> | |
3 #include <math.h> | |
4 #include <stdint.h> | |
5 #include <stdio.h> | |
6 #include <stdlib.h> | |
7 #include <string.h> | |
8 | |
9 #include "../gen/character-test.h" | |
10 #include "../grapheme.h" | |
11 #include "util.h" | |
12 | |
13 #include <utf8proc.h> | |
14 | |
15 #define NUM_ITERATIONS 100000 | |
16 | |
17 struct utf8_benchmark_payload { | |
18 char *buf; | |
19 utf8proc_uint8_t *buf_utf8proc; | |
20 size_t buflen; | |
21 }; | |
22 | |
23 void | |
24 libgrapheme(const void *payload) | |
25 { | |
26 const struct utf8_benchmark_payload *p = payload; | |
27 uint_least32_t cp; | |
28 size_t ret, off; | |
29 | |
30 for (off = 0; off < p->buflen; off += ret) { | |
31 if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen … | |
32 &cp)) > (p->buflen - off… | |
33 break; | |
34 } | |
35 (void)cp; | |
36 } | |
37 } | |
38 | |
39 void | |
40 libutf8proc(const void *payload) | |
41 { | |
42 const struct utf8_benchmark_payload *p = payload; | |
43 utf8proc_int32_t cp; | |
44 utf8proc_ssize_t ret; | |
45 size_t off; | |
46 | |
47 for (off = 0; off < p->buflen; off += (size_t)ret) { | |
48 if ((ret = utf8proc_iterate(p->buf_utf8proc + off, | |
49 (utf8proc_ssize_t)(p->buflen… | |
50 &cp)) < 0) { | |
51 break; | |
52 } | |
53 (void)cp; | |
54 } | |
55 } | |
56 | |
57 int | |
58 main(int argc, char *argv[]) | |
59 { | |
60 struct utf8_benchmark_payload p; | |
61 size_t i; | |
62 double baseline = (double)NAN; | |
63 | |
64 (void)argc; | |
65 | |
66 p.buf = generate_utf8_test_buffer( | |
67 character_break_test, LEN(character_break_test), &(p.buf… | |
68 | |
69 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */ | |
70 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { | |
71 fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
72 exit(1); | |
73 } | |
74 for (i = 0; i < p.buflen; i++) { | |
75 /* | |
76 * even if char is larger than 8 bit, it will only have | |
77 * any of the first 8 bits set (by construction). | |
78 */ | |
79 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i]; | |
80 } | |
81 | |
82 printf("%s\n", argv[0]); | |
83 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &ba… | |
84 NUM_ITERATIONS, p.buflen); | |
85 run_benchmark(libutf8proc, &p, "libutf8proc ", | |
86 "but unsafe (does not detect overlong encodings)",… | |
87 &baseline, NUM_ITERATIONS, p.buflen); | |
88 | |
89 free(p.buf); | |
90 free(p.buf_utf8proc); | |
91 | |
92 return 0; | |
93 } |