Refactor src/bidirectional.c with Herodotus - libgrapheme - unicode string libr… | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit dd15fea026c3e0b389381ae8cc08e0f39fa1a8f7 | |
parent efb2f452b6d1327ba091ac8a69556a060401afed | |
Author: Laslo Hunhold <[email protected]> | |
Date: Fri, 14 Oct 2022 00:40:37 +0200 | |
Refactor src/bidirectional.c with Herodotus | |
This simplifies a lot of the code and makes it more consistent as it now | |
uses patterns that are similar to those in src/case.c. | |
The most significant effect is of course the guarantees that come with | |
using this interface. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M src/bidirectional.c | 115 +++++++++++++++--------------… | |
1 file changed, 54 insertions(+), 61 deletions(-) | |
--- | |
diff --git a/src/bidirectional.c b/src/bidirectional.c | |
@@ -24,22 +24,26 @@ get_bidi_property(uint_least32_t cp) | |
* https://unicode.org/reports/tr9/ | |
* https://github.com/omid/Persian-Log2Vis/blob/master/bidi.php | |
* https://github.com/fribidi/fribidi/blob/master/lib/fribidi.h | |
+ * | |
+ * Apply transformation separately | |
+ * src, dest=1110001110000111 -> get contiguous blocks and apply | |
+ * investigate fribidi and refactor API | |
*/ | |
#define MAX_DEPTH 125 | |
-#include <stdio.h> /* --------------------------------------------------------… | |
-static size_t | |
-determine_paragraph_level(const void *src, size_t srclen, | |
- size_t (*get_codepoint)(const void *, size_t, size_t… | |
- size_t (*set_codepoint)(uint_least32_t, void *, size… | |
+static uint8_t | |
+determine_paragraph_level(const HERODOTUS_READER *r) | |
{ | |
+ HERODOTUS_READER tmp; | |
enum bidi_property prop; | |
- size_t srcoff, isolate_level; | |
+ uint8_t isolate_level; | |
uint_least32_t cp; | |
- for (srcoff = 0, isolate_level = 0; srcoff < srclen; ) { | |
- srcoff += get_codepoint(src, srclen, srcoff, &cp); | |
+ herodotus_reader_copy(r, &tmp); | |
+ | |
+ for (isolate_level = 0; herodotus_read_codepoint(&tmp, true, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS; ) { | |
prop = get_bidi_property(cp); | |
/* BD8/BD9 */ | |
@@ -70,27 +74,21 @@ determine_paragraph_level(const void *src, size_t srclen, | |
return 0; | |
} | |
-static size_t | |
-handle_paragraph(const void *src, size_t srclen, enum grapheme_bidirectional_o… | |
- size_t (*get_codepoint)(const void *, size_t, size_t, uint_le… | |
- size_t (*set_codepoint)(uint_least32_t, void *, size_t, size_… | |
- void *dest, size_t destlen) | |
+static void | |
+handle_paragraph(HERODOTUS_READER *r, enum grapheme_bidirectional_override ove… | |
+ HERODOTUS_WRITER *w) | |
{ | |
enum bidi_property prop; | |
- size_t srcoff, destoff, paragraph_level; | |
+ uint8_t paragraph_level; | |
-fprintf(stderr, "paragraph-call: par='%.*s'\n", (int)srclen, (const char *)src… | |
/* determine paragraph level (rules P1-P3, HL1) */ | |
if (override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
paragraph_level = 0; | |
} else if (override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
paragraph_level = 1; | |
} else { /* GRAPHEME_BIDIRECTIONAL_OVERRIDE_NONE and invalid */ | |
- paragraph_level = determine_paragraph_level(src, srclen, | |
- get_codepoint, | |
- set_codepoint); | |
+ paragraph_level = determine_paragraph_level(r); | |
} | |
-fprintf(stderr, "\tparagraph_level=%zu\n", paragraph_level); | |
/* determine_explicit_levels(...); X1-X8 */ | |
/* prepare_implicit_processing(); X9-X10, BD13 */ | |
@@ -98,53 +96,39 @@ fprintf(stderr, "\tparagraph_level=%zu\n", paragraph_level); | |
/* resolve_neutral_and_isolate_formatting_types() N0-N2 */ | |
/* resolve_implicit_levels(); I1-I2 */ | |
/* reorder_resolved_levels(); L1-L4 */ | |
- | |
- return destoff; | |
} | |
static size_t | |
-logical_to_visual(const void *src, size_t srclen, enum grapheme_bidirectional_… | |
- size_t (*get_codepoint)(const void *, size_t, size_t, uint_l… | |
- size_t (*set_codepoint)(uint_least32_t, void *, size_t, size… | |
- void *dest, size_t destlen) | |
+next_paragraph_break(const HERODOTUS_READER *r) | |
{ | |
- size_t srcoff, destoff, lastparoff; | |
+ HERODOTUS_READER tmp; | |
uint_least32_t cp; | |
- for (srcoff = destoff = lastparoff = 0; srcoff < srclen; ) { | |
- srcoff += get_codepoint(src, srclen, srcoff, &cp); | |
+ herodotus_reader_copy(r, &tmp); | |
- /* P1 */ | |
- if (get_bidi_property(cp) == BIDI_PROP_B || | |
- srcoff == srclen || | |
- (get_codepoint == get_codepoint_utf8 && | |
- srclen == SIZE_MAX && cp == 0)) { | |
- /* | |
- * we encountered a paragraph separator or | |
- * reached the end of the text. | |
- * Call the paragraph handling function on | |
- * the paragraph including the separator. | |
- */ | |
- if (get_codepoint == get_codepoint_utf8) { | |
- destoff += handle_paragraph((const char *)src … | |
- srcoff - lastparof… | |
- get_codepoint, set… | |
- (char *)dest + des… | |
- (destoff < destlen… | |
- (destlen - destoff… | |
- } else { | |
- destoff += handle_paragraph((const uint_least3… | |
- srcoff - lastparof… | |
- get_codepoint, set… | |
- (uint_least32_t *)… | |
- (destoff < destlen… | |
- (destlen - destoff… | |
- } | |
- lastparoff = srcoff; | |
+ for ( ; herodotus_read_codepoint(&tmp, true, &cp) == HERODOTUS_STATUS_… | |
+ if (get_bidi_property(cp) == BIDI_PROP_B) { | |
+ break; | |
} | |
} | |
- return destoff; | |
+ return herodotus_reader_number_read(&tmp); | |
+} | |
+ | |
+static size_t | |
+logical_to_visual(HERODOTUS_READER *r, enum grapheme_bidirectional_override ov… | |
+ HERODOTUS_WRITER *w) | |
+{ | |
+ size_t npb; | |
+ | |
+ for (; (npb = next_paragraph_break(r)) > 0;) { | |
+ /* P1 */ | |
+ herodotus_reader_push_advance_limit(r, npb); | |
+ handle_paragraph(r, override, w); | |
+ herodotus_reader_pop_limit(r); | |
+ } | |
+ | |
+ return herodotus_writer_number_written(w); | |
} | |
size_t | |
@@ -154,8 +138,13 @@ grapheme_bidirectional_logical_to_visual(const uint_least3… | |
uint_least32_t *dest, | |
size_t destlen) | |
{ | |
- return logical_to_visual(src, srclen, override, | |
- get_codepoint, set_codepoint, dest, destlen); | |
+ HERODOTUS_READER r; | |
+ HERODOTUS_WRITER w; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen); | |
+ herodotus_writer_init(&w, HERODOTUS_TYPE_CODEPOINT, dest, destlen); | |
+ | |
+ return logical_to_visual(&r, override, &w); | |
} | |
size_t | |
@@ -163,7 +152,11 @@ grapheme_bidirectional_logical_to_visual_utf8(const char *… | |
enum grapheme_bidirectional_over… | |
char *dest, size_t destlen) | |
{ | |
- return logical_to_visual(src, srclen, override, | |
- get_codepoint_utf8, set_codepoint_utf8, | |
- dest, destlen); | |
+ HERODOTUS_READER r; | |
+ HERODOTUS_WRITER w; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen); | |
+ herodotus_writer_init(&w, HERODOTUS_TYPE_UTF8, dest, destlen); | |
+ | |
+ return logical_to_visual(&r, override, &w); | |
} |