grapheme_decode_utf8.sh - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
grapheme_decode_utf8.sh (2322B) | |
--- | |
1 cat << EOF | |
2 .Dd ${MAN_DATE} | |
3 .Dt GRAPHEME_DECODE_UTF8 3 | |
4 .Os suckless.org | |
5 .Sh NAME | |
6 .Nm grapheme_decode_utf8 | |
7 .Nd decode first codepoint in UTF-8-encoded string | |
8 .Sh SYNOPSIS | |
9 .In grapheme.h | |
10 .Ft size_t | |
11 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t … | |
12 .Sh DESCRIPTION | |
13 The | |
14 .Fn grapheme_decode_utf8 | |
15 function decodes the first codepoint in the UTF-8-encoded string | |
16 .Va str | |
17 of length | |
18 .Va len . | |
19 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, | |
20 string ends unexpectedly, empty string, etc.) the decoding is stopped | |
21 at the last processed byte and the decoded codepoint set to | |
22 .Dv GRAPHEME_INVALID_CODEPOINT . | |
23 .Pp | |
24 If | |
25 .Va cp | |
26 is not | |
27 .Dv NULL | |
28 the decoded codepoint is stored in the memory pointed to by | |
29 .Va cp . | |
30 .Pp | |
31 Given NUL has a unique 1 byte representation, it is safe to operate on | |
32 NUL-terminated strings by setting | |
33 .Va len | |
34 to | |
35 .Dv SIZE_MAX | |
36 (stdint.h is already included by grapheme.h) and terminating when | |
37 .Va cp | |
38 is 0 (see | |
39 .Sx EXAMPLES | |
40 for an example). | |
41 .Sh RETURN VALUES | |
42 The | |
43 .Fn grapheme_decode_utf8 | |
44 function returns the number of processed bytes and 0 if | |
45 .Va str | |
46 is | |
47 .Dv NULL | |
48 or | |
49 .Va len | |
50 is 0. | |
51 If the string ends unexpectedly in a multibyte sequence, the desired | |
52 length (that is larger than | |
53 .Va len ) | |
54 is returned. | |
55 .Sh EXAMPLES | |
56 .Bd -literal | |
57 /* cc (-static) -o example example.c -lgrapheme */ | |
58 #include <grapheme.h> | |
59 #include <inttypes.h> | |
60 #include <stdio.h> | |
61 | |
62 void | |
63 print_cps(const char *str, size_t len) | |
64 { | |
65 size_t ret, off; | |
66 uint_least32_t cp; | |
67 | |
68 for (off = 0; off < len; off += ret) { | |
69 if ((ret = grapheme_decode_utf8(str + off, | |
70 len - off, &cp)) > (len … | |
71 /* | |
72 * string ended unexpectedly in the middle of a | |
73 * multibyte sequence and we have the choice | |
74 * here to possibly expand str by ret - len + off | |
75 * bytes to get a full sequence, but we just | |
76 * bail out in this case. | |
77 */ | |
78 break; | |
79 } | |
80 printf("%"PRIxLEAST32"\\\\n", cp); | |
81 } | |
82 } | |
83 | |
84 void | |
85 print_cps_nul_terminated(const char *str) | |
86 { | |
87 size_t ret, off; | |
88 uint_least32_t cp; | |
89 | |
90 for (off = 0; (ret = grapheme_decode_utf8(str + off, | |
91 SIZE_MAX, &cp)) > 0 && | |
92 cp != 0; off += ret) { | |
93 printf("%"PRIxLEAST32"\\\\n", cp); | |
94 } | |
95 } | |
96 .Ed | |
97 .Sh SEE ALSO | |
98 .Xr grapheme_encode_utf8 3 , | |
99 .Xr libgrapheme 7 | |
100 .Sh AUTHORS | |
101 .An Laslo Hunhold Aq Mt [email protected] | |
102 EOF |