Refactor bidirectional state handling - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit aafe6c300e59ed1b4407c71917fb2034fdc7798a | |
parent fd2d1969084185ff5e638c28066d0d35d510b7f0 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Sun, 20 Nov 2022 23:37:17 +0100 | |
Refactor bidirectional state handling | |
The best approach is to have only one place where state is kept and | |
no risk of "stale" state disturbing program execution. | |
Hand-managing state in the isolate-runner was thus problematic, as there | |
was the real risk of sliding into stale state. Even though this is | |
manageable, it makes the code relatively fragile and hard to debug. | |
In another aspect, the serialization was a mess and was in dire need of | |
more structure. The state currently still contains a "raw property", | |
but this will be removed once the API has been properly split between | |
the preprocessing and line-processing steps. The modified array is put | |
within an #if 0-guard. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M src/bidirectional.c | 808 +++++++++++++++++------------… | |
1 file changed, 434 insertions(+), 374 deletions(-) | |
--- | |
diff --git a/src/bidirectional.c b/src/bidirectional.c | |
@@ -8,149 +8,231 @@ | |
#define MAX_DEPTH 125 | |
+#if 0 | |
+enum state_type { | |
+ STATE_PROP, /* in 0..23, bidi_property */ | |
+ STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */ | |
+ STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */ | |
+ STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */ | |
+ STATE_VISITED, /* in 0..1, visited within isolating run */ | |
+}; | |
+ | |
+/* without rawprop, as it should be */ | |
+static struct { | |
+ int_least32_t filter_mask; | |
+ int_least32_t clear_mask; | |
+ size_t mask_shift; | |
+ int_least16_t value_offset; | |
+} state_lut[] = { | |
+ [STATE_PROP] = { | |
+ .filter_mask = 0x00001F, /* 00000000 00000000 00011111 */ | |
+ .clear_mask = 0x3FFFE0, /* 00111111 11111111 11100000 */ | |
+ .mask_shift = 0, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_BRACKET_OFF] = { | |
+ .filter_mask = 0x001FE0, /* 00000000 00011111 11100000 */ | |
+ .clear_mask = 0x3FE01F, /* 00111111 11100000 00011111 */ | |
+ .mask_shift = 5, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_LEVEL] = { | |
+ .filter_mask = 0x0FE000, /* 00001111 11100000 00000000 */ | |
+ .clear_mask = 0x301FFF, /* 00110000 00011111 11111111 */ | |
+ .mask_shift = 13, | |
+ .value_offset = -1, | |
+ }, | |
+ [STATE_PARAGRAPH_LEVEL] = { | |
+ .filter_mask = 0x100000, /* 00010000 00000000 00000000 */ | |
+ .clear_mask = 0x2FFFFF, /* 00101111 11111111 11111111 */ | |
+ .mask_shift = 20, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_VISITED] = { | |
+ .filter_mask = 0x200000, /* 00100000 00000000 00000000 */ | |
+ .clear_mask = 0x1FFFFF, /* 00011111 11111111 11111111 */ | |
+ .mask_shift = 21, | |
+ .value_offset = 0, | |
+ }, | |
+}; | |
+#endif | |
+ | |
+enum state_type { | |
+ STATE_PROP, /* in 0..23, bidi_property */ | |
+ STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */ | |
+ STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */ | |
+ STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */ | |
+ STATE_VISITED, /* in 0..1, visited within isolating run */ | |
+ STATE_RAWPROP, | |
+}; | |
+ | |
+static struct { | |
+ int_least32_t filter_mask; | |
+ int_least32_t clear_mask; | |
+ size_t mask_shift; | |
+ int_least16_t value_offset; | |
+} state_lut[] = { | |
+ [STATE_PROP] = { | |
+ .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011… | |
+ .clear_mask = 0x7FFFFE0, /* 00000111 11111111 11111111 11100… | |
+ .mask_shift = 0, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_BRACKET_OFF] = { | |
+ .filter_mask = 0x0001FE0, /* 00000000 00000000 00011111 11100… | |
+ .clear_mask = 0x7FFE01F, /* 00000111 11111111 11100000 00011… | |
+ .mask_shift = 5, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_LEVEL] = { | |
+ .filter_mask = 0x00FE000, /* 00000000 00001111 11100000 00000… | |
+ .clear_mask = 0x7F01FFF, /* 00000111 11110000 00011111 11111… | |
+ .mask_shift = 13, | |
+ .value_offset = -1, | |
+ }, | |
+ [STATE_PARAGRAPH_LEVEL] = { | |
+ .filter_mask = 0x0100000, /* 00000000 00010000 00000000 00000… | |
+ .clear_mask = 0x7EFFFFF, /* 00000111 11101111 11111111 11111… | |
+ .mask_shift = 20, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_VISITED] = { | |
+ .filter_mask = 0x0200000, /* 00000000 00100000 00000000 00000… | |
+ .clear_mask = 0x7DFFFFF, /* 00000111 11011111 11111111 11111… | |
+ .mask_shift = 21, | |
+ .value_offset = 0, | |
+ }, | |
+ [STATE_RAWPROP] = { | |
+ .filter_mask = 0x7C00000, /* 00000111 11000000 00000000 00000… | |
+ .clear_mask = 0x03FFFFF, /* 00000000 00111111 11111111 11111… | |
+ .mask_shift = 22, | |
+ .value_offset = 0, | |
+ }, | |
+}; | |
+ | |
+static inline int_least16_t | |
+get_state(enum state_type t, int_least32_t input) | |
+{ | |
+ return (int_least16_t)(((input & state_lut[t].filter_mask) >> | |
+ state_lut[t].mask_shift) + | |
+ state_lut[t].value_offset); | |
+} | |
+ | |
+static inline void | |
+set_state(enum state_type t, int_least16_t value, int_least32_t *output) | |
+{ | |
+ *output &= state_lut[t].clear_mask; | |
+ *output |= ((value - state_lut[t].value_offset) | |
+ << state_lut[t].mask_shift) & | |
+ state_lut[t].filter_mask; | |
+} | |
+ | |
struct isolate_runner { | |
int_least32_t *buf; | |
size_t buflen; | |
struct { | |
- enum bidi_property prop; | |
- } prev; | |
- | |
- struct { | |
size_t off; | |
- enum bidi_property prop; | |
- int_least8_t level; | |
- } cur; | |
+ } prev, cur, next; | |
- struct { | |
- size_t off; | |
- enum bidi_property prop; | |
- } next; | |
+ enum bidi_property sos, eos; | |
uint_least8_t paragraph_level; | |
int_least8_t isolating_run_level; | |
enum bidi_property last_strong_type; | |
}; | |
-/* | |
- * we want to store the bidirectional property, the embedding level | |
- * and the visited-state (for sequential processing of rule X10) | |
- * all in a signed 16-bit-integer (because that's what we will write | |
- * the final embedding level into). To remain outside of implementation | |
- * defined territory, we can only effectively use 15 bits for bitwise | |
- * magic. | |
- * | |
- * Storage is still feasible, though, because the values all have very | |
- * limited ranges and can be stored as unsigned integers: | |
- * | |
- * paragraph_level is in 0..1 which lies in 0..1 = 2^1-1 | |
- * level+1 is in 0..MAX_DEPTH+1=126 which lies in 0..127 = 2^7-1 | |
- * prop is in 0..23 which lies in 0..31 = 2^5-1 | |
- * bracket_off is in 0..255 which lies in 0..255 = 2^8-1 | |
- * visited is in 0..1 which lies in 0..1 = 2^1-1 | |
- * | |
- * yielding a total storage size of 22 bits. | |
- */ | |
-struct state { | |
- uint_least8_t paragraph_level; | |
- int_least8_t level; | |
- enum bidi_property prop; | |
- const struct bracket *bracket; | |
- bool visited; | |
- enum bidi_property rawprop; | |
-}; | |
+static inline enum bidi_property | |
+ir_get_previous_prop(const struct isolate_runner *ir) | |
+{ | |
+ return (ir->prev.off == SIZE_MAX) ? | |
+ ir->sos : | |
+ (uint_least8_t)get_state(STATE_PROP, | |
+ ir->buf[ir->prev.off]); | |
+} | |
-static inline void | |
-state_serialize(const struct state *s, int_least32_t *out) | |
+static inline enum bidi_property | |
+ir_get_current_prop(const struct isolate_runner *ir) | |
{ | |
- *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) & | |
- 0x01 /* 00000001 */) | |
- << 0) | | |
- ((((uint_least32_t)(s->level + 1)) & | |
- 0x7F /* 01111111 */) | |
- << 1) | | |
- ((((uint_least32_t)(s->prop)) & | |
- 0x1F /* 00011111 */) | |
- << 8) | | |
- ((((uint_least32_t)(s->bracket - bidi_bracket))… | |
- 0xFF /* 11111111 */) | |
- << 13) | | |
- ((((uint_least32_t)(s->visited)) & | |
- 0x01 /* 00000001 */) | |
- << 21) | | |
- ((((uint_least32_t)(s->rawprop)) & | |
- 0x1F /* 00011111 */) | |
- << 22)); | |
+ return (uint_least8_t)get_state(STATE_PROP, ir->buf[ir->cur.off]); | |
} | |
-static inline void | |
-state_deserialize(int_least32_t in, struct state *s) | |
+static inline enum bidi_property | |
+ir_get_next_prop(const struct isolate_runner *ir) | |
+{ | |
+ return (ir->next.off == SIZE_MAX) ? | |
+ ir->eos : | |
+ (uint_least8_t)get_state(STATE_PROP, | |
+ ir->buf[ir->next.off]); | |
+} | |
+ | |
+static inline int_least8_t | |
+ir_get_current_level(const struct isolate_runner *ir) | |
{ | |
- s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) & | |
- 0x01 /* 00000001 */); | |
- s->level = (int_least8_t)((((uint_least32_t)in) >> 1) & | |
- 0x7F /* 01111111 */) - | |
- 1; | |
- s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) & | |
- 0x1F /* 00011111 */); | |
- s->bracket = | |
- bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) & | |
- 0xFF /* 11111111 */); | |
- s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */… | |
- s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) & | |
- 0x1F /* 00011111 */); | |
+ return (int_least8_t)get_state(STATE_LEVEL, ir->buf[ir->cur.off]); | |
} | |
static void | |
-isolate_runner_init(int_least32_t *buf, size_t buflen, size_t off, | |
- uint_least8_t paragraph_level, bool within, | |
- struct isolate_runner *ir) | |
+ir_set_current_prop(struct isolate_runner *ir, enum bidi_property prop) | |
{ | |
- struct state s; | |
- size_t i; | |
- int_least8_t cur_level, sos_level; | |
+ set_state(STATE_PROP, (int_least16_t)prop, &(ir->buf[ir->cur.off])); | |
+} | |
- state_deserialize(buf[off], &s); | |
+static void | |
+ir_init(int_least32_t *buf, size_t buflen, size_t off, | |
+ uint_least8_t paragraph_level, bool within, struct isolate_runner *ir) | |
+{ | |
+ size_t i; | |
+ int_least8_t sos_level; | |
/* initialize invariants */ | |
ir->buf = buf; | |
ir->buflen = buflen; | |
ir->paragraph_level = paragraph_level; | |
- ir->isolating_run_level = s.level; | |
/* advance off until we are at a non-removed character */ | |
- while (s.level == -1) { | |
- off++; | |
- state_deserialize(buf[off], &s); | |
+ for (; off < buflen; off++) { | |
+ if (get_state(STATE_LEVEL, buf[off]) != -1) { | |
+ break; | |
+ } | |
+ } | |
+ if (off == buflen) { | |
+ /* we encountered no more non-removed character, terminate */ | |
+ ir->next.off = SIZE_MAX; | |
+ return; | |
} | |
+ /* set the isolating run level to that of the current offset */ | |
+ ir->isolating_run_level = | |
+ (int_least8_t)get_state(STATE_LEVEL, buf[off]); | |
+ | |
+ /* initialize sos and eos to dummy values */ | |
+ ir->sos = ir->eos = NUM_BIDI_PROPS; | |
+ | |
/* | |
- * we store the current offset in the next offset, so it is | |
- * shifted in properly at the first advancement | |
+ * we write the information of the "current" state into next, | |
+ * so that the shift-in at the first advancement moves it in | |
+ * cur, as desired. | |
*/ | |
ir->next.off = off; | |
- ir->next.prop = s.prop; | |
/* | |
- * determine the previous state but store it in cur.prop | |
- * cur.off is set to SIZE_MAX and cur.level to -1, as both are | |
- * discarded on the first advancement anyway | |
+ * determine the previous state but store its offset in cur.off, | |
+ * given it's shifted in on the first advancement | |
*/ | |
- cur_level = s.level; | |
- ir->cur.prop = NUM_BIDI_PROPS; | |
+ ir->cur.off = SIZE_MAX; | |
for (i = off, sos_level = -1; i >= 1; i--) { | |
- state_deserialize(buf[i - 1], &s); | |
- | |
- if (s.level != -1) { | |
+ if (get_state(STATE_LEVEL, buf[i - 1]) != -1) { | |
/* | |
* we found a character that has not been | |
* removed in X9 | |
*/ | |
- sos_level = s.level; | |
+ sos_level = (int_least8_t)get_state(STATE_LEVEL, | |
+ buf[i - 1]); | |
if (within) { | |
/* we just take it */ | |
- ir->cur.prop = s.prop; | |
+ ir->cur.off = i; | |
} | |
break; | |
@@ -158,36 +240,33 @@ isolate_runner_init(int_least32_t *buf, size_t buflen, si… | |
} | |
if (sos_level == -1) { | |
/* | |
- * there were no preceding characters, set sos-level | |
- * to paragraph embedding level | |
+ * there were no preceding non-removed characters, set | |
+ * sos-level to paragraph embedding level | |
*/ | |
sos_level = (int_least8_t)paragraph_level; | |
} | |
- if (!within || ir->cur.prop == NUM_BIDI_PROPS) { | |
+ if (!within || ir->cur.off == SIZE_MAX) { | |
/* | |
* we are at the beginning of the sequence; initialize | |
* it faithfully according to the algorithm by looking | |
* at the sos-level | |
*/ | |
- if (MAX(sos_level, cur_level) % 2 == 0) { | |
+ if (MAX(sos_level, ir->isolating_run_level) % 2 == 0) { | |
/* the higher level is even, set sos to L */ | |
- ir->cur.prop = BIDI_PROP_L; | |
+ ir->sos = BIDI_PROP_L; | |
} else { | |
/* the higher level is odd, set sos to R */ | |
- ir->cur.prop = BIDI_PROP_R; | |
+ ir->sos = BIDI_PROP_R; | |
} | |
} | |
- | |
- ir->cur.off = SIZE_MAX; | |
- ir->cur.level = -1; | |
} | |
static int | |
-isolate_runner_advance(struct isolate_runner *ir) | |
+ir_advance(struct isolate_runner *ir) | |
{ | |
- struct state s; | |
- int_least8_t isolate_level, last_isolate_level; | |
+ enum bidi_property prop; | |
+ int_least8_t level, isolate_level, last_isolate_level; | |
size_t i; | |
if (ir->next.off == SIZE_MAX) { | |
@@ -196,45 +275,43 @@ isolate_runner_advance(struct isolate_runner *ir) | |
} | |
/* shift in */ | |
- ir->prev.prop = ir->cur.prop; | |
+ ir->prev.off = ir->cur.off; | |
ir->cur.off = ir->next.off; | |
- state_deserialize(ir->buf[ir->cur.off], &s); | |
- ir->cur.prop = ir->next.prop; | |
- ir->cur.level = s.level; | |
/* mark as visited */ | |
- s.visited = true; | |
- state_serialize(&s, &(ir->buf[ir->cur.off])); | |
+ set_state(STATE_VISITED, 1, &(ir->buf[ir->cur.off])); | |
/* | |
* update last strong type, which is guaranteed to work properly | |
- * on the first advancement as the prev.prop holds the sos type, | |
- * which can only be either R or L, which are both strong types | |
+ * on the first advancement as the prev.off is SIZE_T and the | |
+ * implied sos type can only be either R or L, which are both | |
+ * strong types | |
*/ | |
- if (ir->prev.prop == BIDI_PROP_R || ir->prev.prop == BIDI_PROP_L || | |
- ir->prev.prop == BIDI_PROP_AL) { | |
- ir->last_strong_type = ir->prev.prop; | |
+ if (ir_get_previous_prop(ir) == BIDI_PROP_R || | |
+ ir_get_previous_prop(ir) == BIDI_PROP_L || | |
+ ir_get_previous_prop(ir) == BIDI_PROP_AL) { | |
+ ir->last_strong_type = ir_get_previous_prop(ir); | |
} | |
/* initialize next state by going to the next character in the sequence | |
*/ | |
ir->next.off = SIZE_MAX; | |
- ir->next.prop = NUM_BIDI_PROPS; | |
last_isolate_level = -1; | |
for (i = ir->cur.off, isolate_level = 0; i < ir->buflen; i++) { | |
- state_deserialize(ir->buf[i], &s); | |
+ level = (int_least8_t)get_state(STATE_LEVEL, ir->buf[i]); | |
+ prop = (uint_least8_t)get_state(STATE_PROP, ir->buf[i]); | |
- if (s.level == -1) { | |
+ if (level == -1) { | |
/* this is one of the ignored characters, skip */ | |
continue; | |
- } else if (s.level == ir->isolating_run_level) { | |
- last_isolate_level = s.level; | |
+ } else if (level == ir->isolating_run_level) { | |
+ last_isolate_level = level; | |
} | |
/* follow BD8/BD9 and P2 to traverse the current sequence */ | |
- if (s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || | |
- s.prop == BIDI_PROP_FSI) { | |
+ if (prop == BIDI_PROP_LRI || prop == BIDI_PROP_RLI || | |
+ prop == BIDI_PROP_FSI) { | |
/* | |
* we encountered an isolate initiator, increment | |
* counter, but go into processing when we | |
@@ -246,7 +323,7 @@ isolate_runner_advance(struct isolate_runner *ir) | |
if (isolate_level != 1) { | |
continue; | |
} | |
- } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) { | |
+ } else if (prop == BIDI_PROP_PDI && isolate_level > 0) { | |
isolate_level--; | |
/* | |
@@ -270,21 +347,20 @@ isolate_runner_advance(struct isolate_runner *ir) | |
if (i == ir->cur.off) { | |
/* we were in the first initializing round */ | |
continue; | |
- } else if (s.level == ir->isolating_run_level) { | |
+ } else if (level == ir->isolating_run_level) { | |
/* isolate_level-skips have been handled before, we're | |
* good */ | |
/* still in the sequence */ | |
- ir->next.off = (size_t)i; | |
- ir->next.prop = s.prop; | |
+ ir->next.off = i; | |
} else { | |
/* out of sequence or isolated, compare levels via eos | |
*/ | |
- if (MAX(last_isolate_level, s.level) % 2 == 0) { | |
- ir->next.prop = BIDI_PROP_L; | |
+ ir->next.off = SIZE_MAX; | |
+ if (MAX(last_isolate_level, level) % 2 == 0) { | |
+ ir->eos = BIDI_PROP_L; | |
} else { | |
- ir->next.prop = BIDI_PROP_R; | |
+ ir->eos = BIDI_PROP_R; | |
} | |
- ir->next.off = SIZE_MAX; | |
} | |
break; | |
} | |
@@ -295,147 +371,107 @@ isolate_runner_advance(struct isolate_runner *ir) | |
* level of the last element in the isolating run sequence | |
* with the paragraph level. | |
*/ | |
+ ir->next.off = SIZE_MAX; | |
if (MAX(last_isolate_level, ir->paragraph_level) % 2 == 0) { | |
/* the higher level is even, set eos to L */ | |
- ir->next.prop = BIDI_PROP_L; | |
+ ir->eos = BIDI_PROP_L; | |
} else { | |
- /* the higher level is odd, set sos to R */ | |
- ir->next.prop = BIDI_PROP_R; | |
+ /* the higher level is odd, set eos to R */ | |
+ ir->eos = BIDI_PROP_R; | |
} | |
- ir->next.off = SIZE_MAX; | |
} | |
return 0; | |
} | |
-static void | |
-isolate_runner_set_current_prop(struct isolate_runner *ir, | |
- enum bidi_property prop) | |
-{ | |
- struct state s; | |
- | |
- state_deserialize(ir->buf[ir->cur.off], &s); | |
- s.prop = prop; | |
- state_serialize(&s, &(ir->buf[ir->cur.off])); | |
- | |
- ir->cur.prop = prop; | |
-} | |
- | |
-static inline enum bidi_property | |
-get_bidi_property(uint_least32_t cp) | |
-{ | |
- if (likely(cp <= 0x10FFFF)) { | |
- return (enum bidi_property)( | |
- (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & | |
- 0x1F /* 00011111 */); | |
- } else { | |
- return BIDI_PROP_L; | |
- } | |
-} | |
- | |
-static inline uint_least8_t | |
-get_bidi_bracket_off(uint_least32_t cp) | |
-{ | |
- if (likely(cp <= 0x10FFFF)) { | |
- return (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) >> 5; | |
- } else { | |
- return 0; | |
- } | |
-} | |
- | |
static size_t | |
process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off, | |
uint_least8_t paragraph_level) | |
{ | |
- enum bidi_property sequence_prop; | |
+ enum bidi_property sequence_prop, prop; | |
struct isolate_runner ir, tmp; | |
size_t runsince, sequence_end; | |
/* W1 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_NSM) { | |
- if (ir.prev.prop == BIDI_PROP_LRI || | |
- ir.prev.prop == BIDI_PROP_RLI || | |
- ir.prev.prop == BIDI_PROP_FSI || | |
- ir.prev.prop == BIDI_PROP_PDI) { | |
- isolate_runner_set_current_prop(&ir, | |
- BIDI_PROP_ON); | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_current_prop(&ir) == BIDI_PROP_NSM) { | |
+ prop = ir_get_previous_prop(&ir); | |
+ | |
+ if (prop == BIDI_PROP_LRI || prop == BIDI_PROP_RLI || | |
+ prop == BIDI_PROP_FSI || prop == BIDI_PROP_PDI) { | |
+ ir_set_current_prop(&ir, BIDI_PROP_ON); | |
} else { | |
- isolate_runner_set_current_prop(&ir, | |
- ir.prev.prop); | |
+ ir_set_current_prop(&ir, prop); | |
} | |
} | |
} | |
/* W2 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_EN && | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_current_prop(&ir) == BIDI_PROP_EN && | |
ir.last_strong_type == BIDI_PROP_AL) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_AN); | |
+ ir_set_current_prop(&ir, BIDI_PROP_AN); | |
} | |
} | |
/* W3 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_AL) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_R); | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_current_prop(&ir) == BIDI_PROP_AL) { | |
+ ir_set_current_prop(&ir, BIDI_PROP_R); | |
} | |
} | |
/* W4 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.prev.prop == BIDI_PROP_EN && | |
- (ir.cur.prop == BIDI_PROP_ES || | |
- ir.cur.prop == BIDI_PROP_CS) && | |
- ir.next.prop == BIDI_PROP_EN) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_EN); | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_previous_prop(&ir) == BIDI_PROP_EN && | |
+ (ir_get_current_prop(&ir) == BIDI_PROP_ES || | |
+ ir_get_current_prop(&ir) == BIDI_PROP_CS) && | |
+ ir_get_next_prop(&ir) == BIDI_PROP_EN) { | |
+ ir_set_current_prop(&ir, BIDI_PROP_EN); | |
} | |
- if (ir.prev.prop == BIDI_PROP_AN && | |
- ir.cur.prop == BIDI_PROP_CS && | |
- ir.next.prop == BIDI_PROP_AN) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_AN); | |
+ if (ir_get_previous_prop(&ir) == BIDI_PROP_AN && | |
+ ir_get_current_prop(&ir) == BIDI_PROP_CS && | |
+ ir_get_next_prop(&ir) == BIDI_PROP_AN) { | |
+ ir_set_current_prop(&ir, BIDI_PROP_AN); | |
} | |
} | |
/* W5 */ | |
runsince = SIZE_MAX; | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_ET) { | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_current_prop(&ir) == BIDI_PROP_ET) { | |
if (runsince == SIZE_MAX) { | |
/* a new run has begun */ | |
runsince = ir.cur.off; | |
} | |
- } else if (ir.cur.prop == BIDI_PROP_EN) { | |
+ } else if (ir_get_current_prop(&ir) == BIDI_PROP_EN) { | |
/* set the preceding sequence */ | |
if (runsince != SIZE_MAX) { | |
- isolate_runner_init(buf, buflen, runsince, | |
- paragraph_level, | |
- (runsince > off), &tmp); | |
- while (!isolate_runner_advance(&tmp) && | |
+ ir_init(buf, buflen, runsince, paragraph_level, | |
+ (runsince > off), &tmp); | |
+ while (!ir_advance(&tmp) && | |
tmp.cur.off < ir.cur.off) { | |
- isolate_runner_set_current_prop( | |
- &tmp, BIDI_PROP_EN); | |
+ ir_set_current_prop(&tmp, BIDI_PROP_EN… | |
} | |
runsince = SIZE_MAX; | |
} else { | |
- isolate_runner_init(buf, buflen, ir.cur.off, | |
- paragraph_level, | |
- (ir.cur.off > off), &tmp); | |
- isolate_runner_advance(&tmp); | |
+ ir_init(buf, buflen, ir.cur.off, | |
+ paragraph_level, (ir.cur.off > off), | |
+ &tmp); | |
+ ir_advance(&tmp); | |
} | |
/* follow the succeeding sequence */ | |
- while (!isolate_runner_advance(&tmp)) { | |
- if (tmp.cur.prop != BIDI_PROP_ET) { | |
+ while (!ir_advance(&tmp)) { | |
+ if (ir_get_current_prop(&tmp) != BIDI_PROP_ET)… | |
break; | |
} | |
- isolate_runner_set_current_prop(&tmp, | |
- BIDI_PROP_EN); | |
+ ir_set_current_prop(&tmp, BIDI_PROP_EN); | |
} | |
} else { | |
/* sequence ended */ | |
@@ -444,21 +480,22 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
} | |
/* W6 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_ES || | |
- ir.cur.prop == BIDI_PROP_ET || | |
- ir.cur.prop == BIDI_PROP_CS) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_ON); | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ prop = ir_get_current_prop(&ir); | |
+ | |
+ if (prop == BIDI_PROP_ES || prop == BIDI_PROP_ET || | |
+ prop == BIDI_PROP_CS) { | |
+ ir_set_current_prop(&ir, BIDI_PROP_ON); | |
} | |
} | |
/* W7 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_EN && | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ if (ir_get_current_prop(&ir) == BIDI_PROP_EN && | |
ir.last_strong_type == BIDI_PROP_L) { | |
- isolate_runner_set_current_prop(&ir, BIDI_PROP_L); | |
+ ir_set_current_prop(&ir, BIDI_PROP_L); | |
} | |
} | |
@@ -467,33 +504,33 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
/* N1 */ | |
sequence_end = SIZE_MAX; | |
sequence_prop = NUM_BIDI_PROPS; | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
if (sequence_end == SIZE_MAX) { | |
- if (ir.cur.prop == BIDI_PROP_B || | |
- ir.cur.prop == BIDI_PROP_S || | |
- ir.cur.prop == BIDI_PROP_WS || | |
- ir.cur.prop == BIDI_PROP_ON || | |
- ir.cur.prop == BIDI_PROP_FSI || | |
- ir.cur.prop == BIDI_PROP_LRI || | |
- ir.cur.prop == BIDI_PROP_RLI || | |
- ir.cur.prop == BIDI_PROP_PDI) { | |
+ prop = ir_get_current_prop(&ir); | |
+ | |
+ if (prop == BIDI_PROP_B || prop == BIDI_PROP_S || | |
+ prop == BIDI_PROP_WS || prop == BIDI_PROP_ON || | |
+ prop == BIDI_PROP_FSI || prop == BIDI_PROP_LRI || | |
+ prop == BIDI_PROP_RLI || prop == BIDI_PROP_PDI) { | |
/* the current character is an NI (neutral or | |
* isolate) */ | |
/* scan ahead to the end of the NI-sequence */ | |
- isolate_runner_init(buf, buflen, ir.cur.off, | |
- paragraph_level, | |
- (ir.cur.off > off), &tmp); | |
- while (!isolate_runner_advance(&tmp)) { | |
- if (tmp.next.prop != BIDI_PROP_B && | |
- tmp.next.prop != BIDI_PROP_S && | |
- tmp.next.prop != BIDI_PROP_WS && | |
- tmp.next.prop != BIDI_PROP_ON && | |
- tmp.next.prop != BIDI_PROP_FSI && | |
- tmp.next.prop != BIDI_PROP_LRI && | |
- tmp.next.prop != BIDI_PROP_RLI && | |
- tmp.next.prop != BIDI_PROP_PDI) { | |
+ ir_init(buf, buflen, ir.cur.off, | |
+ paragraph_level, (ir.cur.off > off), | |
+ &tmp); | |
+ while (!ir_advance(&tmp)) { | |
+ prop = ir_get_next_prop(&tmp); | |
+ | |
+ if (prop != BIDI_PROP_B && | |
+ prop != BIDI_PROP_S && | |
+ prop != BIDI_PROP_WS && | |
+ prop != BIDI_PROP_ON && | |
+ prop != BIDI_PROP_FSI && | |
+ prop != BIDI_PROP_LRI && | |
+ prop != BIDI_PROP_RLI && | |
+ prop != BIDI_PROP_PDI) { | |
break; | |
} | |
} | |
@@ -502,16 +539,22 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
* check what follows and see if the text has | |
* the same direction on both sides | |
*/ | |
- if (ir.prev.prop == BIDI_PROP_L && | |
- tmp.next.prop == BIDI_PROP_L) { | |
+ if (ir_get_previous_prop(&ir) == BIDI_PROP_L && | |
+ ir_get_next_prop(&tmp) == BIDI_PROP_L) { | |
sequence_end = tmp.cur.off; | |
sequence_prop = BIDI_PROP_L; | |
- } else if ((ir.prev.prop == BIDI_PROP_R || | |
- ir.prev.prop == BIDI_PROP_EN || | |
- ir.prev.prop == BIDI_PROP_AN) && | |
- (tmp.next.prop == BIDI_PROP_R || | |
- tmp.next.prop == BIDI_PROP_EN || | |
- tmp.next.prop == BIDI_PROP_AN)) { | |
+ } else if ((ir_get_previous_prop(&ir) == | |
+ BIDI_PROP_R || | |
+ ir_get_previous_prop(&ir) == | |
+ BIDI_PROP_EN || | |
+ ir_get_previous_prop(&ir) == | |
+ BIDI_PROP_AN) && | |
+ (ir_get_next_prop(&tmp) == | |
+ BIDI_PROP_R || | |
+ ir_get_next_prop(&tmp) == | |
+ BIDI_PROP_EN || | |
+ ir_get_next_prop(&tmp) == | |
+ BIDI_PROP_AN)) { | |
sequence_end = tmp.cur.off; | |
sequence_prop = BIDI_PROP_R; | |
} | |
@@ -520,8 +563,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b… | |
if (sequence_end != SIZE_MAX) { | |
if (ir.cur.off <= sequence_end) { | |
- isolate_runner_set_current_prop(&ir, | |
- sequence_prop); | |
+ ir_set_current_prop(&ir, sequence_prop); | |
} else { | |
/* end of sequence, reset */ | |
sequence_end = SIZE_MAX; | |
@@ -531,24 +573,21 @@ process_isolating_run_sequence(int_least32_t *buf, size_t… | |
} | |
/* N2 */ | |
- isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); | |
- while (!isolate_runner_advance(&ir)) { | |
- if (ir.cur.prop == BIDI_PROP_B || ir.cur.prop == BIDI_PROP_S || | |
- ir.cur.prop == BIDI_PROP_WS || | |
- ir.cur.prop == BIDI_PROP_ON || | |
- ir.cur.prop == BIDI_PROP_FSI || | |
- ir.cur.prop == BIDI_PROP_LRI || | |
- ir.cur.prop == BIDI_PROP_RLI || | |
- ir.cur.prop == BIDI_PROP_PDI) { | |
+ ir_init(buf, buflen, off, paragraph_level, false, &ir); | |
+ while (!ir_advance(&ir)) { | |
+ prop = ir_get_current_prop(&ir); | |
+ | |
+ if (prop == BIDI_PROP_B || prop == BIDI_PROP_S || | |
+ prop == BIDI_PROP_WS || prop == BIDI_PROP_ON || | |
+ prop == BIDI_PROP_FSI || prop == BIDI_PROP_LRI || | |
+ prop == BIDI_PROP_RLI || prop == BIDI_PROP_PDI) { | |
/* N2 */ | |
- if (ir.cur.level % 2 == 0) { | |
+ if (ir_get_current_level(&ir) % 2 == 0) { | |
/* even embedding level */ | |
- isolate_runner_set_current_prop(&ir, | |
- BIDI_PROP_L); | |
+ ir_set_current_prop(&ir, BIDI_PROP_L); | |
} else { | |
/* odd embedding level */ | |
- isolate_runner_set_current_prop(&ir, | |
- BIDI_PROP_R); | |
+ ir_set_current_prop(&ir, BIDI_PROP_R); | |
} | |
} | |
} | |
@@ -561,7 +600,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
bool terminate_on_pdi, const int_least32_t *buf, | |
size_t buflen) | |
{ | |
- struct state s; | |
+ enum bidi_property prop; | |
int_least8_t isolate_level; | |
size_t bufoff; | |
@@ -575,9 +614,9 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
/* determine paragraph level (rules P1-P3) */ | |
for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
+ prop = (uint_least8_t)get_state(STATE_PROP, buf[bufoff]); | |
- if (s.prop == BIDI_PROP_PDI && isolate_level == 0 && | |
+ if (prop == BIDI_PROP_PDI && isolate_level == 0 && | |
terminate_on_pdi) { | |
/* | |
* we are in a FSI-subsection of a paragraph and | |
@@ -587,12 +626,12 @@ get_paragraph_level(enum grapheme_bidirectional_override … | |
} | |
/* BD8/BD9 */ | |
- if ((s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || | |
- s.prop == BIDI_PROP_FSI) && | |
+ if ((prop == BIDI_PROP_LRI || prop == BIDI_PROP_RLI || | |
+ prop == BIDI_PROP_FSI) && | |
isolate_level < MAX_DEPTH) { | |
/* we hit an isolate initiator, increment counter */ | |
isolate_level++; | |
- } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) { | |
+ } else if (prop == BIDI_PROP_PDI && isolate_level > 0) { | |
isolate_level--; | |
} | |
@@ -602,9 +641,9 @@ get_paragraph_level(enum grapheme_bidirectional_override ov… | |
} | |
/* P3 */ | |
- if (s.prop == BIDI_PROP_L) { | |
+ if (prop == BIDI_PROP_L) { | |
return 0; | |
- } else if (s.prop == BIDI_PROP_AL || s.prop == BIDI_PROP_R) { | |
+ } else if (prop == BIDI_PROP_AL || prop == BIDI_PROP_R) { | |
return 1; | |
} | |
} | |
@@ -616,8 +655,8 @@ static void | |
get_paragraph_embedding_levels(enum grapheme_bidirectional_override override, | |
int_least32_t *buf, size_t buflen) | |
{ | |
- enum bidi_property tmp_prop; | |
- struct state s, t; | |
+ enum bidi_property prop; | |
+ int_least8_t level; | |
struct { | |
int_least8_t level; | |
@@ -639,10 +678,9 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional… | |
valid_isolate_count = 0; | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
- tmp_prop = s.prop; | |
+ prop = (uint_least8_t)get_state(STATE_PROP, buf[bufoff]); | |
again: | |
- if (tmp_prop == BIDI_PROP_RLE) { | |
+ if (prop == BIDI_PROP_RLE) { | |
/* X2 */ | |
if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
MAX_DEPTH && | |
@@ -661,7 +699,7 @@ again: | |
overflow_embedding_count += | |
(overflow_isolate_count == 0); | |
} | |
- } else if (tmp_prop == BIDI_PROP_LRE) { | |
+ } else if (prop == BIDI_PROP_LRE) { | |
/* X3 */ | |
if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
MAX_DEPTH && | |
@@ -680,7 +718,7 @@ again: | |
overflow_embedding_count += | |
(overflow_isolate_count == 0); | |
} | |
- } else if (tmp_prop == BIDI_PROP_RLO) { | |
+ } else if (prop == BIDI_PROP_RLO) { | |
/* X4 */ | |
if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
MAX_DEPTH && | |
@@ -699,7 +737,7 @@ again: | |
overflow_embedding_count += | |
(overflow_isolate_count == 0); | |
} | |
- } else if (tmp_prop == BIDI_PROP_LRO) { | |
+ } else if (prop == BIDI_PROP_LRO) { | |
/* X5 */ | |
if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
MAX_DEPTH && | |
@@ -718,17 +756,18 @@ again: | |
overflow_embedding_count += | |
(overflow_isolate_count == 0); | |
} | |
- } else if (tmp_prop == BIDI_PROP_RLI) { | |
+ } else if (prop == BIDI_PROP_RLI) { | |
/* X5a */ | |
- s.level = dirstat->level; | |
+ set_state(STATE_LEVEL, dirstat->level, &(buf[bufoff])); | |
if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
- s.prop = BIDI_PROP_L; | |
+ set_state(STATE_PROP, BIDI_PROP_L, | |
+ &(buf[bufoff])); | |
} else if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
- s.prop = BIDI_PROP_R; | |
+ set_state(STATE_PROP, BIDI_PROP_R, | |
+ &(buf[bufoff])); | |
} | |
- state_serialize(&s, &(buf[bufoff])); | |
if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= | |
MAX_DEPTH && | |
@@ -748,17 +787,18 @@ again: | |
/* overflow RLI */ | |
overflow_isolate_count++; | |
} | |
- } else if (tmp_prop == BIDI_PROP_LRI) { | |
+ } else if (prop == BIDI_PROP_LRI) { | |
/* X5b */ | |
- s.level = dirstat->level; | |
+ set_state(STATE_LEVEL, dirstat->level, &(buf[bufoff])); | |
if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
- s.prop = BIDI_PROP_L; | |
+ set_state(STATE_PROP, BIDI_PROP_L, | |
+ &(buf[bufoff])); | |
} else if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
- s.prop = BIDI_PROP_R; | |
+ set_state(STATE_PROP, BIDI_PROP_R, | |
+ &(buf[bufoff])); | |
} | |
- state_serialize(&s, &(buf[bufoff])); | |
if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= | |
MAX_DEPTH && | |
@@ -778,33 +818,32 @@ again: | |
/* overflow LRI */ | |
overflow_isolate_count++; | |
} | |
- } else if (tmp_prop == BIDI_PROP_FSI) { | |
+ } else if (prop == BIDI_PROP_FSI) { | |
/* X5c */ | |
if (get_paragraph_level( | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL, | |
true, buf + (bufoff + 1), | |
buflen - (bufoff + 1)) == 1) { | |
- tmp_prop = BIDI_PROP_RLI; | |
+ prop = BIDI_PROP_RLI; | |
goto again; | |
} else { /* ... == 0 */ | |
- tmp_prop = BIDI_PROP_LRI; | |
+ prop = BIDI_PROP_LRI; | |
goto again; | |
} | |
- } else if (tmp_prop != BIDI_PROP_B && | |
- tmp_prop != BIDI_PROP_BN && | |
- tmp_prop != BIDI_PROP_PDF && | |
- tmp_prop != BIDI_PROP_PDI) { | |
+ } else if (prop != BIDI_PROP_B && prop != BIDI_PROP_BN && | |
+ prop != BIDI_PROP_PDF && prop != BIDI_PROP_PDI) { | |
/* X6 */ | |
- s.level = dirstat->level; | |
+ set_state(STATE_LEVEL, dirstat->level, &(buf[bufoff])); | |
if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
- s.prop = BIDI_PROP_L; | |
+ set_state(STATE_PROP, BIDI_PROP_L, | |
+ &(buf[bufoff])); | |
} else if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
- s.prop = BIDI_PROP_R; | |
+ set_state(STATE_PROP, BIDI_PROP_R, | |
+ &(buf[bufoff])); | |
} | |
- state_serialize(&s, &(buf[bufoff])); | |
- } else if (tmp_prop == BIDI_PROP_PDI) { | |
+ } else if (prop == BIDI_PROP_PDI) { | |
/* X6a */ | |
if (overflow_isolate_count > 0) { | |
/* PDI matches an overflow isolate initiator */ | |
@@ -844,16 +883,17 @@ again: | |
valid_isolate_count--; | |
} | |
- s.level = dirstat->level; | |
+ set_state(STATE_LEVEL, dirstat->level, &(buf[bufoff])); | |
if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { | |
- s.prop = BIDI_PROP_L; | |
+ set_state(STATE_PROP, BIDI_PROP_L, | |
+ &(buf[bufoff])); | |
} else if (dirstat->override == | |
GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { | |
- s.prop = BIDI_PROP_R; | |
+ set_state(STATE_PROP, BIDI_PROP_R, | |
+ &(buf[bufoff])); | |
} | |
- state_serialize(&s, &(buf[bufoff])); | |
- } else if (tmp_prop == BIDI_PROP_PDF) { | |
+ } else if (prop == BIDI_PROP_PDF) { | |
/* X7 */ | |
if (overflow_isolate_count > 0) { | |
/* do nothing */ | |
@@ -863,25 +903,23 @@ again: | |
dirstat > directional_status) { | |
dirstat--; | |
} | |
- } else if (tmp_prop == BIDI_PROP_B) { | |
+ } else if (prop == BIDI_PROP_B) { | |
/* X8 */ | |
- s.level = (int_least8_t)paragraph_level; | |
- state_serialize(&s, &(buf[bufoff])); | |
+ set_state(STATE_LEVEL, paragraph_level, &(buf[bufoff])… | |
} | |
/* X9 */ | |
- if (tmp_prop == BIDI_PROP_RLE || tmp_prop == BIDI_PROP_LRE || | |
- tmp_prop == BIDI_PROP_RLO || tmp_prop == BIDI_PROP_LRO || | |
- tmp_prop == BIDI_PROP_PDF || tmp_prop == BIDI_PROP_BN) { | |
- s.level = -1; | |
- state_serialize(&s, &(buf[bufoff])); | |
+ if (prop == BIDI_PROP_RLE || prop == BIDI_PROP_LRE || | |
+ prop == BIDI_PROP_RLO || prop == BIDI_PROP_LRO || | |
+ prop == BIDI_PROP_PDF || prop == BIDI_PROP_BN) { | |
+ set_state(STATE_LEVEL, -1, &(buf[bufoff])); | |
} | |
} | |
/* X10 (W1-W7, N0-N2) */ | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
- if (!s.visited && s.level != -1) { | |
+ if (get_state(STATE_VISITED, buf[bufoff]) == 0 && | |
+ get_state(STATE_LEVEL, buf[bufoff]) != -1) { | |
bufoff += process_isolating_run_sequence( | |
buf, buflen, bufoff, paragraph_level); | |
} | |
@@ -892,52 +930,53 @@ again: | |
* isolating run sequences, we apply this rule separately) | |
*/ | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
+ level = (int_least8_t)get_state(STATE_LEVEL, buf[bufoff]); | |
+ prop = (uint_least8_t)get_state(STATE_PROP, buf[bufoff]); | |
- if (s.level % 2 == 0) { | |
+ if (level % 2 == 0) { | |
/* even level */ | |
- if (s.prop == BIDI_PROP_R) { | |
- s.level += 1; | |
- } else if (s.prop == BIDI_PROP_AN || | |
- s.prop == BIDI_PROP_EN) { | |
- s.level += 2; | |
+ if (prop == BIDI_PROP_R) { | |
+ set_state(STATE_LEVEL, level + 1, | |
+ &(buf[bufoff])); | |
+ } else if (prop == BIDI_PROP_AN || | |
+ prop == BIDI_PROP_EN) { | |
+ set_state(STATE_LEVEL, level + 2, | |
+ &(buf[bufoff])); | |
} | |
} else { | |
/* odd level */ | |
- if (s.prop == BIDI_PROP_L || s.prop == BIDI_PROP_EN || | |
- s.prop == BIDI_PROP_AN) { | |
- s.level += 1; | |
+ if (prop == BIDI_PROP_L || prop == BIDI_PROP_EN || | |
+ prop == BIDI_PROP_AN) { | |
+ set_state(STATE_LEVEL, level + 1, | |
+ &(buf[bufoff])); | |
} | |
} | |
- | |
- state_serialize(&s, &(buf[bufoff])); | |
} | |
/* L1 (rules 1-3) */ | |
runsince = SIZE_MAX; | |
for (bufoff = 0; bufoff < buflen; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
+ level = (int_least8_t)get_state(STATE_LEVEL, buf[bufoff]); | |
+ prop = (uint_least8_t)get_state(STATE_RAWPROP, buf[bufoff]); | |
- if (s.level == -1) { | |
+ if (level == -1) { | |
/* ignored character */ | |
continue; | |
} | |
- if (s.rawprop == BIDI_PROP_WS || s.rawprop == BIDI_PROP_FSI || | |
- s.rawprop == BIDI_PROP_LRI || s.rawprop == BIDI_PROP_RLI || | |
- s.rawprop == BIDI_PROP_PDI) { | |
+ if (prop == BIDI_PROP_WS || prop == BIDI_PROP_FSI || | |
+ prop == BIDI_PROP_LRI || prop == BIDI_PROP_RLI || | |
+ prop == BIDI_PROP_PDI) { | |
if (runsince == SIZE_MAX) { | |
/* a new run has begun */ | |
runsince = bufoff; | |
} | |
- } else if (s.rawprop == BIDI_PROP_S || | |
- s.rawprop == BIDI_PROP_B) { | |
+ } else if (prop == BIDI_PROP_S || prop == BIDI_PROP_B) { | |
/* L1.4 -- ignored for now, < beachten! */ | |
for (i = runsince; i < bufoff; i++) { | |
- state_deserialize(buf[i], &t); | |
- if (t.level != -1) { | |
- t.level = (int_least8_t)paragraph_leve… | |
- state_serialize(&t, &(buf[i])); | |
+ if (get_state(STATE_LEVEL, buf[i]) != -1) { | |
+ set_state(STATE_LEVEL, paragraph_level, | |
+ &(buf[i])); | |
} | |
} | |
runsince = SIZE_MAX; | |
@@ -946,9 +985,8 @@ again: | |
runsince = SIZE_MAX; | |
} | |
- if (s.rawprop == BIDI_PROP_S || s.rawprop == BIDI_PROP_B) { | |
- s.level = (int_least8_t)paragraph_level; | |
- state_serialize(&s, &(buf[bufoff])); | |
+ if (prop == BIDI_PROP_S || prop == BIDI_PROP_B) { | |
+ set_state(STATE_LEVEL, paragraph_level, &(buf[bufoff])… | |
} | |
continue; | |
} | |
@@ -958,22 +996,41 @@ again: | |
* are in a run | |
*/ | |
for (i = runsince; i < buflen; i++) { | |
- state_deserialize(buf[i], &s); | |
- if (s.level != -1) { | |
- s.level = (int_least8_t)paragraph_level; | |
+ if (get_state(STATE_LEVEL, buf[i]) != -1) { | |
+ set_state(STATE_LEVEL, paragraph_level, | |
+ &(buf[i])); | |
} | |
- state_serialize(&s, &(buf[i])); | |
} | |
runsince = SIZE_MAX; | |
} | |
} | |
+static inline uint_least8_t | |
+get_bidi_property(uint_least32_t cp) | |
+{ | |
+ if (likely(cp <= 0x10FFFF)) { | |
+ return (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & | |
+ 0x1F /* 00011111 */; | |
+ } else { | |
+ return BIDI_PROP_L; | |
+ } | |
+} | |
+ | |
+static inline uint_least8_t | |
+get_bidi_bracket_off(uint_least32_t cp) | |
+{ | |
+ if (likely(cp <= 0x10FFFF)) { | |
+ return (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) >> 5; | |
+ } else { | |
+ return 0; | |
+ } | |
+} | |
+ | |
static size_t | |
get_embedding_levels(HERODOTUS_READER *r, | |
enum grapheme_bidirectional_override override, | |
int_least32_t *buf, size_t buflen) | |
{ | |
- struct state s; | |
size_t bufoff, bufsize, lastparoff; | |
uint_least32_t cp; | |
@@ -1001,20 +1058,24 @@ get_embedding_levels(HERODOTUS_READER *r, | |
* the iteration to be able to give a good | |
* return value | |
*/ | |
- s.paragraph_level = 0; | |
- s.level = 0; | |
- s.prop = get_bidi_property(cp); | |
- s.bracket = bidi_bracket + get_bidi_bracket_off(cp); | |
- s.visited = 0; | |
- s.rawprop = get_bidi_property(cp); | |
- state_serialize(&s, &(buf[bufoff])); | |
+ set_state(STATE_PROP, | |
+ (uint_least8_t)get_bidi_property(cp), | |
+ &(buf[bufoff])); | |
+ set_state(STATE_BRACKET_OFF, get_bidi_bracket_off(cp), | |
+ &(buf[bufoff])); | |
+ set_state(STATE_LEVEL, 0, &(buf[bufoff])); | |
+ set_state(STATE_PARAGRAPH_LEVEL, 0, &(buf[bufoff])); | |
+ set_state(STATE_VISITED, 0, &(buf[bufoff])); | |
+ set_state(STATE_RAWPROP, | |
+ (uint_least8_t)get_bidi_property(cp), | |
+ &(buf[bufoff])); | |
} | |
} | |
bufsize = herodotus_reader_number_read(r); | |
for (bufoff = 0, lastparoff = 0; bufoff < bufsize; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
- if (s.prop != BIDI_PROP_B && bufoff != bufsize - 1) { | |
+ if (get_state(STATE_PROP, buf[bufoff]) != BIDI_PROP_B && | |
+ bufoff != bufsize - 1) { | |
continue; | |
} | |
@@ -1032,8 +1093,7 @@ get_embedding_levels(HERODOTUS_READER *r, | |
/* bake the levels into the buffer, discarding the metadata */ | |
for (bufoff = 0; bufoff < bufsize; bufoff++) { | |
- state_deserialize(buf[bufoff], &s); | |
- buf[bufoff] = s.level; | |
+ buf[bufoff] = get_state(STATE_LEVEL, buf[bufoff]); | |
} | |
/* |