next_break.sh - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
next_break.sh (3397B) | |
--- | |
1 if [ "$ENCODING" = "utf8" ]; then | |
2 UNIT="byte" | |
3 SUFFIX="_utf8" | |
4 ANTISUFFIX="" | |
5 else | |
6 UNIT="codepoint" | |
7 SUFFIX="" | |
8 ANTISUFFIX="_utf8" | |
9 fi | |
10 | |
11 cat << EOF | |
12 .Dd ${MAN_DATE} | |
13 .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:]… | |
14 .Os suckless.org | |
15 .Sh NAME | |
16 .Nm grapheme_next_${TYPE}_break${SUFFIX} | |
17 .Nd determine ${UNIT}-offset to next ${REALTYPE} break | |
18 .Sh SYNOPSIS | |
19 .In grapheme.h | |
20 .Ft size_t | |
21 .Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "ut… | |
22 .Sh DESCRIPTION | |
23 The | |
24 .Fn grapheme_next_${TYPE}_break${SUFFIX} | |
25 function computes the offset (in ${UNIT}s) to the next ${REALTYPE} | |
26 break (see | |
27 .Xr libgrapheme 7 ) | |
28 in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"… | |
29 .Va str | |
30 of length | |
31 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} beg… | |
32 .Va str | |
33 this offset is equal to the length of said ${REALTYPE}."; fi) | |
34 .Pp | |
35 If | |
36 .Va len | |
37 is set to | |
38 .Dv SIZE_MAX | |
39 (stdint.h is already included by grapheme.h) the string | |
40 .Va str | |
41 is interpreted to be NUL-terminated and processing stops when | |
42 a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "co… | |
43 .Pp | |
44 For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else pr… | |
45 data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then pri… | |
46 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 | |
47 can be used instead. | |
48 .Sh RETURN VALUES | |
49 The | |
50 .Fn grapheme_next_${TYPE}_break${SUFFIX} | |
51 function returns the offset (in ${UNIT}s) to the next ${REALTYPE} | |
52 break in | |
53 .Va str | |
54 or 0 if | |
55 .Va str | |
56 is | |
57 .Dv NULL . | |
58 EOF | |
59 | |
60 if [ "$ENCODING" = "utf8" ]; then | |
61 cat << EOF | |
62 .Sh EXAMPLES | |
63 .Bd -literal | |
64 /* cc (-static) -o example example.c -lgrapheme */ | |
65 #include <grapheme.h> | |
66 #include <stdint.h> | |
67 #include <stdio.h> | |
68 | |
69 int | |
70 main(void) | |
71 { | |
72 /* UTF-8 encoded input */ | |
73 char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2… | |
74 "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\… | |
75 "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\… | |
76 "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\… | |
77 size_t ret, len, off; | |
78 | |
79 printf("Input: \\\\"%s\\\\"\\\\n", s); | |
80 | |
81 /* print each ${REALTYPE} with byte-length */ | |
82 printf("${REALTYPE}s in NUL-delimited input:\\\\n"); | |
83 for (off = 0; s[off] != '\\\\0'; off += ret) { | |
84 ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX… | |
85 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); | |
86 } | |
87 printf("\\\\n"); | |
88 | |
89 /* do the same, but this time string is length-delimited */ | |
90 len = 17; | |
91 printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len… | |
92 for (off = 0; off < len; off += ret) { | |
93 ret = grapheme_next_${TYPE}_break_utf8(s + off, len - of… | |
94 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); | |
95 } | |
96 | |
97 return 0; | |
98 } | |
99 .Ed | |
100 EOF | |
101 fi | |
102 | |
103 cat << EOF | |
104 .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ];… | |
105 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 , | |
106 .Xr libgrapheme 7 | |
107 .Sh STANDARDS | |
108 .Fn grapheme_next_${TYPE}_break${SUFFIX} | |
109 is compliant with the Unicode ${UNICODE_VERSION} specification. | |
110 .Sh AUTHORS | |
111 .An Laslo Hunhold Aq Mt [email protected] | |
112 EOF |