Introduction
Introduction Statistics Contact Development Disclaimer Help
Compile the library in freestanding mode - libgrapheme - unicode string library
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
commit 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
parent 9f15d7eb0c9cf216f069d6972c58520013b80acb
Author: Laslo Hunhold <[email protected]>
Date: Sat, 24 Sep 2022 01:54:52 +0200
Compile the library in freestanding mode
Looking closely, we never explicitly depend on the standard library
within the actual library code. This can be explicitly expressed by
setting -ffreestanding during object-compilation and -nostdlib during
linking. The result is a clean library with zero libc-symbols, allowing
it to be used even without an operating system (kernel code, ELF,
etc.), by making use of the freestanding implementation form defined
in the standard[0].
To be freestanding, the code may only include <float.h>, <iso646.h>,
<limits.h>, <stdalign.h>, <stdarg.h>, <stdbool.h>, <stddef.h>,
<stdint.h> and <stdnoreturn.h>. We satisfy this condition implictly,
but there are some erroneous supplementary includes that are removed
in this commit. Additionally, the strict compiler-implementation simply
adds the U-prefix to the argument of UINT16_C (et. al.), which is why
calls to it have to be changed to really include only constants.
[0]:https://www.iso-9899.info/n1570.html#4.p6
Signed-off-by: Laslo Hunhold <[email protected]>
Diffstat:
M config.mk | 4 ++--
M src/character.c | 139 +++++++++++++++--------------…
M src/line.c | 2 --
M src/sentence.c | 2 --
M src/utf8.c | 3 ++-
M src/util.c | 11 +++++++++--
M src/word.c | 2 --
7 files changed, 82 insertions(+), 81 deletions(-)
---
diff --git a/config.mk b/config.mk
@@ -15,8 +15,8 @@ BUILD_CPPFLAGS = $(CPPFLAGS)
BUILD_CFLAGS = $(CFLAGS)
BUILD_LDFLAGS = $(LDFLAGS)
-SHFLAGS = -fPIC
-SOFLAGS = -shared -Wl,--soname=libgrapheme.so
+SHFLAGS = -fPIC -ffreestanding
+SOFLAGS = -shared -nostdlib -Wl,--soname=libgrapheme.so
# tools
CC = cc
diff --git a/src/character.c b/src/character.c
@@ -1,8 +1,7 @@
/* See LICENSE file for copyright and license details. */
+#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/character.h"
#include "../grapheme.h"
@@ -10,96 +9,96 @@
static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_OTHER] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_CR] =
- UINT16_C(1 << CHAR_BREAK_PROP_LF), /* GB3 */
+ UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
[CHAR_BREAK_PROP_EXTEND] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_L] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_L) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LV) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LVT) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_V] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_T] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB8 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LV] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LVT] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB8 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_PREPEND] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK) | /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
(UINT16_C(0xFFFF) &
- ~(UINT16_C(1 << CHAR_BREAK_PROP_CR) |
- UINT16_C(1 << CHAR_BREAK_PROP_LF) |
- UINT16_C(1 << CHAR_BREAK_PROP_CONTROL)
+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
+ UINT16_C(1) << CHAR_BREAK_PROP_LF |
+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
)
), /* GB9b */
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_SPACINGMARK] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_ZWJ] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
};
static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) |
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) |
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) |
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
};
static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
};
static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+ UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
};
static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+ UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
};
static inline enum char_break_property
@@ -135,23 +134,23 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas…
state->gb11_flag =
flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
state->gb11_flag] &
- UINT16_C(1 << cp1_prop);
+ UINT16_C(1) << cp1_prop;
state->gb12_13_flag =
flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
state->gb12_13_flag] &
- UINT16_C(1 << cp1_prop);
+ UINT16_C(1) << cp1_prop;
/*
* Apply grapheme cluster breaking algorithm (UAX #29), see
* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_…
*/
- notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
+ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
(dont_break_gb11[cp0_prop + state->gb11_flag *
NUM_CHAR_BREAK_PROPS] &
- UINT16_C(1 << cp1_prop)) ||
+ (UINT16_C(1) << cp1_prop)) ||
(dont_break_gb12_13[cp0_prop + state->gb12_13_flag *
NUM_CHAR_BREAK_PROPS] &
- UINT16_C(1 << cp1_prop));
+ (UINT16_C(1) << cp1_prop));
/* update or reset flags (when we have a break) */
if (likely(!notbreak)) {
@@ -168,9 +167,9 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least3…
* Given we have no state, this behaves as if the state-boolea…
* were all set to false
*/
- notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
- (dont_break_gb11[cp0_prop] & UINT16_C(1 << cp1_prop…
- (dont_break_gb12_13[cp0_prop] & UINT16_C(1 << cp1_p…
+ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
+ (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_pr…
+ (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1…
}
return !notbreak;
diff --git a/src/line.c b/src/line.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/line.h"
#include "../grapheme.h"
diff --git a/src/sentence.c b/src/sentence.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/sentence.h"
#include "../grapheme.h"
diff --git a/src/utf8.c b/src/utf8.c
@@ -1,5 +1,6 @@
/* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
#include "../grapheme.h"
#include "util.h"
diff --git a/src/util.c b/src/util.c
@@ -1,7 +1,8 @@
/* See LICENSE file for copyright and license details. */
+#include <limits.h>
#include <stdbool.h>
+#include <stddef.h>
#include <stdint.h>
-#include <stdlib.h>
#include "../gen/types.h"
#include "../grapheme.h"
@@ -88,6 +89,12 @@ herodotus_reader_next_codepoint_break(const HERODOTUS_READER…
}
}
+size_t
+herodotus_reader_number_read(const HERODOTUS_READER *r)
+{
+ return r->off;
+}
+
enum herodotus_status
herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
{
@@ -202,7 +209,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
}
size_t
-herodotus_writer_number_written(HERODOTUS_WRITER *w)
+herodotus_writer_number_written(const HERODOTUS_WRITER *w)
{
return w->off;
}
diff --git a/src/word.c b/src/word.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/word.h"
#include "../grapheme.h"
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.