Introduction
Introduction Statistics Contact Development Disclaimer Help
Refactor state into unsigned integer - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
commit ea1be565ad117a3e9846ae0e855d41021d94ee8a
parent f517655a98a155694cf57c180531724baa081c26
Author: Laslo Hunhold <[email protected]>
Date: Mon, 21 Nov 2022 11:05:26 +0100
Refactor state into unsigned integer
Now that we separated the level-determination itself, there
is no need to have a signed integer for this purpose. This
simplifies the masking.
Diffstat:
M grapheme.h | 6 +++---
M src/bidirectional.c | 122 +++++++++--------------------…
M test/bidirectional.c | 2 +-
3 files changed, 37 insertions(+), 93 deletions(-)
---
diff --git a/grapheme.h b/grapheme.h
@@ -16,14 +16,14 @@ enum grapheme_bidirectional_override {
};
void grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *, size_t, int_least8_t *);
+ const uint_least32_t *, size_t, int_least8_t *);
size_t grapheme_bidirectional_preprocess(
const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
size_t grapheme_bidirectional_preprocess_utf8(
const char *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
size_t grapheme_bidirectional_reorder_line(
const uint_least32_t *, const int_least8_t *, size_t,
diff --git a/src/bidirectional.c b/src/bidirectional.c
@@ -8,127 +8,71 @@
#define MAX_DEPTH 125
-#if 0
enum state_type {
STATE_PROP, /* in 0..23, bidi_property */
+ STATE_PRESERVED_PROP, /* in 0..23, preserved bidi_property for L1-rul…
STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
STATE_VISITED, /* in 0..1, visited within isolating run */
};
-/* without rawprop, as it should be */
static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
+ uint_least32_t filter_mask;
size_t mask_shift;
int_least16_t value_offset;
} state_lut[] = {
[STATE_PROP] = {
- .filter_mask = 0x00001F, /* 00000000 00000000 00011111 */
- .clear_mask = 0x3FFFE0, /* 00111111 11111111 11100000 */
+ .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011…
.mask_shift = 0,
.value_offset = 0,
},
- [STATE_BRACKET_OFF] = {
- .filter_mask = 0x001FE0, /* 00000000 00011111 11100000 */
- .clear_mask = 0x3FE01F, /* 00111111 11100000 00011111 */
+ [STATE_PRESERVED_PROP] = {
+ .filter_mask = 0x00003E0, /* 00000000 00000000 00000011 11100…
.mask_shift = 5,
.value_offset = 0,
},
- [STATE_LEVEL] = {
- .filter_mask = 0x0FE000, /* 00001111 11100000 00000000 */
- .clear_mask = 0x301FFF, /* 00110000 00011111 11111111 */
- .mask_shift = 13,
- .value_offset = -1,
- },
- [STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x100000, /* 00010000 00000000 00000000 */
- .clear_mask = 0x2FFFFF, /* 00101111 11111111 11111111 */
- .mask_shift = 20,
- .value_offset = 0,
- },
- [STATE_VISITED] = {
- .filter_mask = 0x200000, /* 00100000 00000000 00000000 */
- .clear_mask = 0x1FFFFF, /* 00011111 11111111 11111111 */
- .mask_shift = 21,
- .value_offset = 0,
- },
-};
-#endif
-
-enum state_type {
- STATE_PROP, /* in 0..23, bidi_property */
- STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
- STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
- STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
- STATE_VISITED, /* in 0..1, visited within isolating run */
- STATE_RAWPROP,
-};
-
-static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
- size_t mask_shift;
- int_least16_t value_offset;
-} state_lut[] = {
- [STATE_PROP] = {
- .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011…
- .clear_mask = 0x7FFFFE0, /* 00000111 11111111 11111111 11100…
- .mask_shift = 0,
- .value_offset = 0,
- },
[STATE_BRACKET_OFF] = {
- .filter_mask = 0x0001FE0, /* 00000000 00000000 00011111 11100…
- .clear_mask = 0x7FFE01F, /* 00000111 11111111 11100000 00011…
- .mask_shift = 5,
+ .filter_mask = 0x003FC00, /* 00000000 00000011 11111100 00000…
+ .mask_shift = 10,
.value_offset = 0,
},
[STATE_LEVEL] = {
- .filter_mask = 0x00FE000, /* 00000000 00001111 11100000 00000…
- .clear_mask = 0x7F01FFF, /* 00000111 11110000 00011111 11111…
- .mask_shift = 13,
+ .filter_mask = 0x1FC0000, /* 00000001 11111100 00000000 00000…
+ .mask_shift = 18,
.value_offset = -1,
},
[STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x0100000, /* 00000000 00010000 00000000 00000…
- .clear_mask = 0x7EFFFFF, /* 00000111 11101111 11111111 11111…
- .mask_shift = 20,
+ .filter_mask = 0x2000000, /* 00000010 00000000 00000000 00000…
+ .mask_shift = 25,
.value_offset = 0,
},
[STATE_VISITED] = {
- .filter_mask = 0x0200000, /* 00000000 00100000 00000000 00000…
- .clear_mask = 0x7DFFFFF, /* 00000111 11011111 11111111 11111…
- .mask_shift = 21,
- .value_offset = 0,
- },
- [STATE_RAWPROP] = {
- .filter_mask = 0x7C00000, /* 00000111 11000000 00000000 00000…
- .clear_mask = 0x03FFFFF, /* 00000000 00111111 11111111 11111…
- .mask_shift = 22,
+ .filter_mask = 0x4000000, /* 00000100 00000000 00000000 00000…
+ .mask_shift = 26,
.value_offset = 0,
},
};
static inline int_least16_t
-get_state(enum state_type t, int_least32_t input)
+get_state(enum state_type t, uint_least32_t input)
{
- return (int_least16_t)(((input & state_lut[t].filter_mask) >>
- state_lut[t].mask_shift) +
- state_lut[t].value_offset);
+ return (int_least16_t)((input & state_lut[t].filter_mask) >>
+ state_lut[t].mask_shift) +
+ state_lut[t].value_offset;
}
static inline void
-set_state(enum state_type t, int_least16_t value, int_least32_t *output)
+set_state(enum state_type t, int_least16_t value, uint_least32_t *output)
{
- *output &= state_lut[t].clear_mask;
- *output |= ((value - state_lut[t].value_offset)
+ *output &= ~state_lut[t].filter_mask;
+ *output |= ((uint_least32_t)(value - state_lut[t].value_offset)
<< state_lut[t].mask_shift) &
state_lut[t].filter_mask;
}
struct isolate_runner {
- int_least32_t *buf;
+ uint_least32_t *buf;
size_t buflen;
struct {
@@ -179,7 +123,7 @@ ir_set_current_prop(struct isolate_runner *ir, enum bidi_pr…
}
static void
-ir_init(int_least32_t *buf, size_t buflen, size_t off,
+ir_init(uint_least32_t *buf, size_t buflen, size_t off,
uint_least8_t paragraph_level, bool within, struct isolate_runner *ir)
{
size_t i;
@@ -385,7 +329,7 @@ ir_advance(struct isolate_runner *ir)
}
static size_t
-preprocess_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t of…
+preprocess_isolating_run_sequence(uint_least32_t *buf, size_t buflen, size_t o…
uint_least8_t paragraph_level)
{
enum bidi_property sequence_prop, prop;
@@ -597,7 +541,7 @@ preprocess_isolating_run_sequence(int_least32_t *buf, size_…
static uint_least8_t
get_paragraph_level(enum grapheme_bidirectional_override override,
- bool terminate_on_pdi, const int_least32_t *buf,
+ bool terminate_on_pdi, const uint_least32_t *buf,
size_t buflen)
{
enum bidi_property prop;
@@ -653,7 +597,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
static void
preprocess_paragraph(enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
{
enum bidi_property prop;
int_least8_t level;
@@ -961,7 +905,7 @@ again:
runsince = SIZE_MAX;
for (bufoff = 0; bufoff < buflen; bufoff++) {
level = (int_least8_t)get_state(STATE_LEVEL, buf[bufoff]);
- prop = (uint_least8_t)get_state(STATE_RAWPROP, buf[bufoff]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, buf[bufo…
if (level == -1) {
/* ignored character */
@@ -1038,7 +982,7 @@ get_bidi_bracket_off(uint_least32_t cp)
static size_t
preprocess(HERODOTUS_READER *r,
enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
{
size_t bufoff, bufsize, lastparoff;
uint_least32_t cp;
@@ -1075,7 +1019,7 @@ preprocess(HERODOTUS_READER *r,
set_state(STATE_LEVEL, 0, &(buf[bufoff]));
set_state(STATE_PARAGRAPH_LEVEL, 0, &(buf[bufoff]));
set_state(STATE_VISITED, 0, &(buf[bufoff]));
- set_state(STATE_RAWPROP,
+ set_state(STATE_PRESERVED_PROP,
(uint_least8_t)get_bidi_property(cp),
&(buf[bufoff]));
}
@@ -1110,7 +1054,7 @@ preprocess(HERODOTUS_READER *r,
size_t
grapheme_bidirectional_preprocess(
const uint_least32_t *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
size_t destlen)
{
HERODOTUS_READER r;
@@ -1123,7 +1067,7 @@ grapheme_bidirectional_preprocess(
size_t
grapheme_bidirectional_preprocess_utf8(
const char *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
size_t destlen)
{
HERODOTUS_READER r;
@@ -1135,7 +1079,7 @@ grapheme_bidirectional_preprocess_utf8(
void
grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *linedata, size_t linelen, int_least8_t *linelevel)
+ const uint_least32_t *linedata, size_t linelen, int_least8_t *lineleve…
{
enum bidi_property prop;
size_t i, runsince;
@@ -1143,7 +1087,7 @@ grapheme_bidirectional_get_line_embedding_levels(
/* rule L1.4 */
runsince = SIZE_MAX;
for (i = 0; i < linelen; i++) {
- prop = (uint_least8_t)get_state(STATE_RAWPROP, linedata[i]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, linedata…
/* write level into level array */
if ((linelevel[i] = (int_least8_t)get_state(
@@ -1171,7 +1115,7 @@ grapheme_bidirectional_get_line_embedding_levels(
*/
for (i = runsince; i < linelen; i++) {
if (linelevel[i] != -1) {
- linelevel[i] = get_state(
+ linelevel[i] = (int_least8_t)get_state(
STATE_PARAGRAPH_LEVEL, linedata[i]);
}
}
diff --git a/test/bidirectional.c b/test/bidirectional.c
@@ -12,7 +12,7 @@
int
main(int argc, char *argv[])
{
- int_least32_t data[512]; /* TODO iterate and get max, allocate */
+ uint_least32_t data[512]; /* TODO iterate and get max, allocate */
int_least8_t lev[512];
size_t i, num_tests, failed, datalen, ret, j, m;
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.