| utf8-decode.c - libgrapheme - unicode string library | |
| git clone git://git.suckless.org/libgrapheme | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| utf8-decode.c (2129B) | |
| --- | |
| 1 /* See LICENSE file for copyright and license details. */ | |
| 2 #include <errno.h> | |
| 3 #include <math.h> | |
| 4 #include <stdint.h> | |
| 5 #include <stdio.h> | |
| 6 #include <stdlib.h> | |
| 7 #include <string.h> | |
| 8 | |
| 9 #include "../gen/character-test.h" | |
| 10 #include "../grapheme.h" | |
| 11 #include "util.h" | |
| 12 | |
| 13 #include <utf8proc.h> | |
| 14 | |
| 15 #define NUM_ITERATIONS 100000 | |
| 16 | |
| 17 struct utf8_benchmark_payload { | |
| 18 char *buf; | |
| 19 utf8proc_uint8_t *buf_utf8proc; | |
| 20 size_t buflen; | |
| 21 }; | |
| 22 | |
| 23 void | |
| 24 libgrapheme(const void *payload) | |
| 25 { | |
| 26 const struct utf8_benchmark_payload *p = payload; | |
| 27 uint_least32_t cp; | |
| 28 size_t ret, off; | |
| 29 | |
| 30 for (off = 0; off < p->buflen; off += ret) { | |
| 31 if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen … | |
| 32 &cp)) > (p->buflen - off… | |
| 33 break; | |
| 34 } | |
| 35 (void)cp; | |
| 36 } | |
| 37 } | |
| 38 | |
| 39 void | |
| 40 libutf8proc(const void *payload) | |
| 41 { | |
| 42 const struct utf8_benchmark_payload *p = payload; | |
| 43 utf8proc_int32_t cp; | |
| 44 utf8proc_ssize_t ret; | |
| 45 size_t off; | |
| 46 | |
| 47 for (off = 0; off < p->buflen; off += (size_t)ret) { | |
| 48 if ((ret = utf8proc_iterate(p->buf_utf8proc + off, | |
| 49 (utf8proc_ssize_t)(p->buflen… | |
| 50 &cp)) < 0) { | |
| 51 break; | |
| 52 } | |
| 53 (void)cp; | |
| 54 } | |
| 55 } | |
| 56 | |
| 57 int | |
| 58 main(int argc, char *argv[]) | |
| 59 { | |
| 60 struct utf8_benchmark_payload p; | |
| 61 size_t i; | |
| 62 double baseline = (double)NAN; | |
| 63 | |
| 64 (void)argc; | |
| 65 | |
| 66 p.buf = generate_utf8_test_buffer( | |
| 67 character_break_test, LEN(character_break_test), &(p.buf… | |
| 68 | |
| 69 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */ | |
| 70 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { | |
| 71 fprintf(stderr, "malloc: %s\n", strerror(errno)); | |
| 72 exit(1); | |
| 73 } | |
| 74 for (i = 0; i < p.buflen; i++) { | |
| 75 /* | |
| 76 * even if char is larger than 8 bit, it will only have | |
| 77 * any of the first 8 bits set (by construction). | |
| 78 */ | |
| 79 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i]; | |
| 80 } | |
| 81 | |
| 82 printf("%s\n", argv[0]); | |
| 83 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &ba… | |
| 84 NUM_ITERATIONS, p.buflen); | |
| 85 run_benchmark(libutf8proc, &p, "libutf8proc ", | |
| 86 "but unsafe (does not detect overlong encodings)",… | |
| 87 &baseline, NUM_ITERATIONS, p.buflen); | |
| 88 | |
| 89 free(p.buf); | |
| 90 free(p.buf_utf8proc); | |
| 91 | |
| 92 return 0; | |
| 93 } |