Introduction
Introduction Statistics Contact Development Disclaimer Help
util.c - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
util.c (11480B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <limits.h>
3 #include <stdbool.h>
4 #include <stddef.h>
5 #include <stdint.h>
6
7 #include "../gen/types.h"
8 #include "../grapheme.h"
9 #include "util.h"
10
11 void
12 herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
13 const void *src, size_t srclen)
14 {
15 size_t i;
16
17 r->type = type;
18 r->src = src;
19 r->srclen = srclen;
20 r->off = 0;
21 r->terminated_by_null = false;
22
23 for (i = 0; i < LEN(r->soft_limit); i++) {
24 r->soft_limit[i] = SIZE_MAX;
25 }
26 }
27
28 void
29 herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *des…
30 {
31 size_t i;
32
33 /*
34 * we copy such that we have a "fresh" start and build on the
35 * fact that src->soft_limit[i] for any i and src->srclen are
36 * always larger or equal to src->off
37 */
38 dest->type = src->type;
39 if (src->type == HERODOTUS_TYPE_CODEPOINT) {
40 dest->src =
41 (src->src == NULL) ?
42 NULL :
43 ((const uint_least32_t *)(src->src)) + s…
44 } else { /* src->type == HERODOTUS_TYPE_UTF8 */
45 dest->src = (src->src == NULL) ?
46 NULL :
47 ((const char *)(src->src)) + src->of…
48 }
49 if (src->srclen == SIZE_MAX) {
50 dest->srclen = SIZE_MAX;
51 } else {
52 dest->srclen =
53 (src->off < src->srclen) ? src->srclen - src->of…
54 }
55 dest->off = 0;
56 dest->terminated_by_null = src->terminated_by_null;
57
58 for (i = 0; i < LEN(src->soft_limit); i++) {
59 if (src->soft_limit[i] == SIZE_MAX) {
60 dest->soft_limit[i] = SIZE_MAX;
61 } else {
62 /*
63 * if we have a degenerate case where the offset…
64 * higher than the soft-limit, we simply clamp t…
65 * soft-limit to zero given we can't decide here
66 * to release the limit and, instead, we just
67 * prevent any more reads
68 */
69 dest->soft_limit[i] =
70 (src->off < src->soft_limit[i]) ?
71 src->soft_limit[i] - src->off :
72 0;
73 }
74 }
75 }
76
77 void
78 herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
79 {
80 size_t i;
81
82 for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
83 r->soft_limit[i] = r->soft_limit[i - 1];
84 }
85 r->soft_limit[0] = r->off + count;
86 }
87
88 void
89 herodotus_reader_pop_limit(HERODOTUS_READER *r)
90 {
91 size_t i;
92
93 for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
94 r->soft_limit[i] = r->soft_limit[i + 1];
95 }
96 r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
97 }
98
99 size_t
100 herodotus_reader_next_word_break(const HERODOTUS_READER *r)
101 {
102 if (r->type == HERODOTUS_TYPE_CODEPOINT) {
103 return grapheme_next_word_break(
104 (const uint_least32_t *)(r->src) + r->off,
105 MIN(r->srclen, r->soft_limit[0]) - r->off);
106 } else { /* r->type == HERODOTUS_TYPE_UTF8 */
107 return grapheme_next_word_break_utf8(
108 (const char *)(r->src) + r->off,
109 MIN(r->srclen, r->soft_limit[0]) - r->off);
110 }
111 }
112
113 size_t
114 herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
115 {
116 if (r->type == HERODOTUS_TYPE_CODEPOINT) {
117 return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 :…
118 } else { /* r->type == HERODOTUS_TYPE_UTF8 */
119 return grapheme_decode_utf8(
120 (const char *)(r->src) + r->off,
121 MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
122 }
123 }
124
125 size_t
126 herodotus_reader_number_read(const HERODOTUS_READER *r)
127 {
128 return r->off;
129 }
130
131 enum herodotus_status
132 herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32…
133 {
134 size_t ret;
135
136 if (r->terminated_by_null || r->off >= r->srclen || r->src == NU…
137 *cp = GRAPHEME_INVALID_CODEPOINT;
138 return HERODOTUS_STATUS_END_OF_BUFFER;
139 }
140
141 if (r->off >= r->soft_limit[0]) {
142 *cp = GRAPHEME_INVALID_CODEPOINT;
143 return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
144 }
145
146 if (r->type == HERODOTUS_TYPE_CODEPOINT) {
147 *cp = ((const uint_least32_t *)(r->src))[r->off];
148 ret = 1;
149 } else { /* r->type == HERODOTUS_TYPE_UTF8 */
150 ret = grapheme_decode_utf8(
151 (const char *)r->src + r->off,
152 MIN(r->srclen, r->soft_limit[0]) - r->off, cp);
153 }
154
155 if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
156 /*
157 * We encountered a null-codepoint. Don't increment
158 * offset and return as if the buffer had ended here all
159 * along
160 */
161 r->terminated_by_null = true;
162 return HERODOTUS_STATUS_END_OF_BUFFER;
163 }
164
165 if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
166 /*
167 * we want more than we have; instead of returning
168 * garbage we terminate here.
169 */
170 return HERODOTUS_STATUS_END_OF_BUFFER;
171 }
172
173 /*
174 * Increase offset which we now know won't surpass the limits,
175 * unless we got told otherwise
176 */
177 if (advance) {
178 r->off += ret;
179 }
180
181 return HERODOTUS_STATUS_SUCCESS;
182 }
183
184 void
185 herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, voi…
186 size_t destlen)
187 {
188 w->type = type;
189 w->dest = dest;
190 w->destlen = destlen;
191 w->off = 0;
192 w->first_unwritable_offset = SIZE_MAX;
193 }
194
195 void
196 herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
197 {
198 if (w->dest == NULL) {
199 return;
200 }
201
202 if (w->off < w->destlen) {
203 /* We still have space in the buffer. Simply use it */
204 if (w->type == HERODOTUS_TYPE_CODEPOINT) {
205 ((uint_least32_t *)(w->dest))[w->off] = 0;
206 } else { /* w->type == HERODOTUS_TYPE_UTF8 */
207 ((char *)(w->dest))[w->off] = '\0';
208 }
209 } else if (w->first_unwritable_offset < w->destlen) {
210 /*
211 * There is no more space in the buffer. However,
212 * we have noted down the first offset we couldn't
213 * use to write into the buffer and it's smaller than
214 * destlen. Thus we bailed writing into the
215 * destination when a multibyte-codepoint couldn't be
216 * written. So the last "real" byte might be at
217 * destlen-4, destlen-3, destlen-2 or destlen-1
218 * (the last case meaning truncation).
219 */
220 if (w->type == HERODOTUS_TYPE_CODEPOINT) {
221 ((uint_least32_t
222 *)(w->dest))[w->first_unwritable_offse…
223 } else { /* w->type == HERODOTUS_TYPE_UTF8 */
224 ((char *)(w->dest))[w->first_unwritable_offset] …
225 }
226 } else if (w->destlen > 0) {
227 /*
228 * In this case, there is no more space in the buffer and
229 * the last unwritable offset is larger than
230 * or equal to the destination buffer length. This means
231 * that we are forced to simply write into the last
232 * byte.
233 */
234 if (w->type == HERODOTUS_TYPE_CODEPOINT) {
235 ((uint_least32_t *)(w->dest))[w->destlen - 1] = …
236 } else { /* w->type == HERODOTUS_TYPE_UTF8 */
237 ((char *)(w->dest))[w->destlen - 1] = '\0';
238 }
239 }
240
241 /* w->off is not incremented in any case */
242 }
243
244 size_t
245 herodotus_writer_number_written(const HERODOTUS_WRITER *w)
246 {
247 return w->off;
248 }
249
250 void
251 herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
252 {
253 size_t ret;
254
255 /*
256 * This function will always faithfully say how many codepoints
257 * were written, even if the buffer ends. This is used to enable
258 * truncation detection.
259 */
260 if (w->type == HERODOTUS_TYPE_CODEPOINT) {
261 if (w->dest != NULL && w->off < w->destlen) {
262 ((uint_least32_t *)(w->dest))[w->off] = cp;
263 }
264
265 w->off += 1;
266 } else { /* w->type == HERODOTUS_TYPE_UTF8 */
267 /*
268 * First determine how many bytes we need to encode the
269 * codepoint
270 */
271 ret = grapheme_encode_utf8(cp, NULL, 0);
272
273 if (w->dest != NULL && w->off + ret < w->destlen) {
274 /* we still have enough room in the buffer */
275 grapheme_encode_utf8(cp, (char *)(w->dest) + w->…
276 w->destlen - w->off);
277 } else if (w->first_unwritable_offset == SIZE_MAX) {
278 /*
279 * the first unwritable offset has not been
280 * noted down, so this is the first time we can't
281 * write (completely) to an offset
282 */
283 w->first_unwritable_offset = w->off;
284 }
285
286 w->off += ret;
287 }
288 }
289
290 void
291 proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_pro…
292 uint_least8_t (*get_break_prop)(uint_least32_t),
293 bool (*is_skippable_prop)(uint_least8_t),
294 void (*skip_shift_callback)(uint_least8_t, void *),
295 struct proper *p)
296 {
297 uint_least8_t prop;
298 uint_least32_t cp;
299 size_t i;
300
301 /* set internal variables */
302 p->state = state;
303 p->no_prop = no_prop;
304 p->get_break_prop = get_break_prop;
305 p->is_skippable_prop = is_skippable_prop;
306 p->skip_shift_callback = skip_shift_callback;
307
308 /*
309 * Initialize mid-reader, which is basically just there
310 * to reflect the current position of the viewing-line
311 */
312 herodotus_reader_copy(r, &(p->mid_reader));
313
314 /*
315 * In the initialization, we simply (try to) fill in next_prop.
316 * If we cannot read in more (due to the buffer ending), we
317 * fill in the prop as invalid
318 */
319
320 /*
321 * initialize the previous properties to have no property
322 * (given we are at the start of the buffer)
323 */
324 p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
325 p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;
326
327 /*
328 * initialize the next properties
329 */
330
331 /* initialize the raw reader */
332 herodotus_reader_copy(r, &(p->raw_reader));
333
334 /* fill in the two next raw properties (after no-initialization)…
335 p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
336 for (i = 0;
337 i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &…
338 HERODOTUS_STATUS_SUCCESS;) {
339 p->raw.next_prop[i++] = p->get_break_prop(cp);
340 }
341
342 /* initialize the skip reader */
343 herodotus_reader_copy(r, &(p->skip_reader));
344
345 /* fill in the two next skip properties (after no-initialization…
346 p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
347 for (i = 0;
348 i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, …
349 HERODOTUS_STATUS_SUCCESS;) {
350 prop = p->get_break_prop(cp);
351 if (!p->is_skippable_prop(prop)) {
352 p->skip.next_prop[i++] = prop;
353 }
354 }
355 }
356
357 int
358 proper_advance(struct proper *p)
359 {
360 uint_least8_t prop;
361 uint_least32_t cp;
362
363 /* read in next "raw" property */
364 if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
365 HERODOTUS_STATUS_SUCCESS) {
366 prop = p->get_break_prop(cp);
367 } else {
368 prop = p->no_prop;
369 }
370
371 /*
372 * do a shift-in, unless we find that the property that is to
373 * be moved past the "raw-viewing-line" (this property is stored
374 * in p->raw.next_prop[0]) is a no_prop, indicating that
375 * we are at the end of the buffer.
376 */
377 if (p->raw.next_prop[0] == p->no_prop) {
378 return 1;
379 }
380
381 /* shift in the properties */
382 p->raw.prev_prop[1] = p->raw.prev_prop[0];
383 p->raw.prev_prop[0] = p->raw.next_prop[0];
384 p->raw.next_prop[0] = p->raw.next_prop[1];
385 p->raw.next_prop[1] = prop;
386
387 /* advance the middle reader viewing-line */
388 (void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);
389
390 /* check skippability-property */
391 if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
392 /*
393 * the property that has moved past the "raw-viewing-lin…
394 * (this property is now (after the raw-shift) stored in
395 * p->raw.prev_prop[0] and guaranteed not to be a no-pro…
396 * guaranteeing that we won't shift a no-prop past the
397 * "viewing-line" in the skip-properties) is not a skipp…
398 * property, thus we need to shift the skip property as …
399 */
400 p->skip.prev_prop[1] = p->skip.prev_prop[0];
401 p->skip.prev_prop[0] = p->skip.next_prop[0];
402 p->skip.next_prop[0] = p->skip.next_prop[1];
403
404 /*
405 * call the skip-shift-callback on the property that
406 * passed the skip-viewing-line (this property is now
407 * stored in p->skip.prev_prop[0]).
408 */
409 p->skip_shift_callback(p->skip.prev_prop[0], p->state);
410
411 /* determine the next shift property */
412 p->skip.next_prop[1] = p->no_prop;
413 while (herodotus_read_codepoint(&(p->skip_reader), true,…
414 HERODOTUS_STATUS_SUCCESS) {
415 prop = p->get_break_prop(cp);
416 if (!p->is_skippable_prop(prop)) {
417 p->skip.next_prop[1] = prop;
418 break;
419 }
420 }
421 }
422
423 return 0;
424 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.