| grapheme_decode_utf8.sh - libgrapheme - unicode string library | |
| git clone git://git.suckless.org/libgrapheme | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| grapheme_decode_utf8.sh (2322B) | |
| --- | |
| 1 cat << EOF | |
| 2 .Dd ${MAN_DATE} | |
| 3 .Dt GRAPHEME_DECODE_UTF8 3 | |
| 4 .Os suckless.org | |
| 5 .Sh NAME | |
| 6 .Nm grapheme_decode_utf8 | |
| 7 .Nd decode first codepoint in UTF-8-encoded string | |
| 8 .Sh SYNOPSIS | |
| 9 .In grapheme.h | |
| 10 .Ft size_t | |
| 11 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t … | |
| 12 .Sh DESCRIPTION | |
| 13 The | |
| 14 .Fn grapheme_decode_utf8 | |
| 15 function decodes the first codepoint in the UTF-8-encoded string | |
| 16 .Va str | |
| 17 of length | |
| 18 .Va len . | |
| 19 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, | |
| 20 string ends unexpectedly, empty string, etc.) the decoding is stopped | |
| 21 at the last processed byte and the decoded codepoint set to | |
| 22 .Dv GRAPHEME_INVALID_CODEPOINT . | |
| 23 .Pp | |
| 24 If | |
| 25 .Va cp | |
| 26 is not | |
| 27 .Dv NULL | |
| 28 the decoded codepoint is stored in the memory pointed to by | |
| 29 .Va cp . | |
| 30 .Pp | |
| 31 Given NUL has a unique 1 byte representation, it is safe to operate on | |
| 32 NUL-terminated strings by setting | |
| 33 .Va len | |
| 34 to | |
| 35 .Dv SIZE_MAX | |
| 36 (stdint.h is already included by grapheme.h) and terminating when | |
| 37 .Va cp | |
| 38 is 0 (see | |
| 39 .Sx EXAMPLES | |
| 40 for an example). | |
| 41 .Sh RETURN VALUES | |
| 42 The | |
| 43 .Fn grapheme_decode_utf8 | |
| 44 function returns the number of processed bytes and 0 if | |
| 45 .Va str | |
| 46 is | |
| 47 .Dv NULL | |
| 48 or | |
| 49 .Va len | |
| 50 is 0. | |
| 51 If the string ends unexpectedly in a multibyte sequence, the desired | |
| 52 length (that is larger than | |
| 53 .Va len ) | |
| 54 is returned. | |
| 55 .Sh EXAMPLES | |
| 56 .Bd -literal | |
| 57 /* cc (-static) -o example example.c -lgrapheme */ | |
| 58 #include <grapheme.h> | |
| 59 #include <inttypes.h> | |
| 60 #include <stdio.h> | |
| 61 | |
| 62 void | |
| 63 print_cps(const char *str, size_t len) | |
| 64 { | |
| 65 size_t ret, off; | |
| 66 uint_least32_t cp; | |
| 67 | |
| 68 for (off = 0; off < len; off += ret) { | |
| 69 if ((ret = grapheme_decode_utf8(str + off, | |
| 70 len - off, &cp)) > (len … | |
| 71 /* | |
| 72 * string ended unexpectedly in the middle of a | |
| 73 * multibyte sequence and we have the choice | |
| 74 * here to possibly expand str by ret - len + off | |
| 75 * bytes to get a full sequence, but we just | |
| 76 * bail out in this case. | |
| 77 */ | |
| 78 break; | |
| 79 } | |
| 80 printf("%"PRIxLEAST32"\\\\n", cp); | |
| 81 } | |
| 82 } | |
| 83 | |
| 84 void | |
| 85 print_cps_nul_terminated(const char *str) | |
| 86 { | |
| 87 size_t ret, off; | |
| 88 uint_least32_t cp; | |
| 89 | |
| 90 for (off = 0; (ret = grapheme_decode_utf8(str + off, | |
| 91 SIZE_MAX, &cp)) > 0 && | |
| 92 cp != 0; off += ret) { | |
| 93 printf("%"PRIxLEAST32"\\\\n", cp); | |
| 94 } | |
| 95 } | |
| 96 .Ed | |
| 97 .Sh SEE ALSO | |
| 98 .Xr grapheme_encode_utf8 3 , | |
| 99 .Xr libgrapheme 7 | |
| 100 .Sh AUTHORS | |
| 101 .An Laslo Hunhold Aq Mt [email protected] | |
| 102 EOF |