| word.c - libgrapheme - unicode string library | |
| git clone git://git.suckless.org/libgrapheme | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| word.c (3038B) | |
| --- | |
| 1 /* See LICENSE file for copyright and license details. */ | |
| 2 #include <stdio.h> | |
| 3 #include <stdlib.h> | |
| 4 #include <string.h> | |
| 5 | |
| 6 #include "util.h" | |
| 7 | |
| 8 #define FILE_EMOJI "data/emoji-data.txt" | |
| 9 #define FILE_WORD "data/WordBreakProperty.txt" | |
| 10 | |
| 11 static const struct property_spec word_break_property[] = { | |
| 12 { | |
| 13 .enumname = "OTHER", | |
| 14 .file = NULL, | |
| 15 .ucdname = NULL, | |
| 16 }, | |
| 17 { | |
| 18 .enumname = "ALETTER", | |
| 19 .file = FILE_WORD, | |
| 20 .ucdname = "ALetter", | |
| 21 }, | |
| 22 { | |
| 23 .enumname = "BOTH_ALETTER_EXTPICT", | |
| 24 .file = NULL, | |
| 25 .ucdname = NULL, | |
| 26 }, | |
| 27 { | |
| 28 .enumname = "CR", | |
| 29 .file = FILE_WORD, | |
| 30 .ucdname = "CR", | |
| 31 }, | |
| 32 { | |
| 33 .enumname = "DOUBLE_QUOTE", | |
| 34 .file = FILE_WORD, | |
| 35 .ucdname = "Double_Quote", | |
| 36 }, | |
| 37 { | |
| 38 .enumname = "EXTEND", | |
| 39 .file = FILE_WORD, | |
| 40 .ucdname = "Extend", | |
| 41 }, | |
| 42 { | |
| 43 .enumname = "EXTENDED_PICTOGRAPHIC", | |
| 44 .file = FILE_EMOJI, | |
| 45 .ucdname = "Extended_Pictographic", | |
| 46 }, | |
| 47 { | |
| 48 .enumname = "EXTENDNUMLET", | |
| 49 .file = FILE_WORD, | |
| 50 .ucdname = "ExtendNumLet", | |
| 51 }, | |
| 52 { | |
| 53 .enumname = "FORMAT", | |
| 54 .file = FILE_WORD, | |
| 55 .ucdname = "Format", | |
| 56 }, | |
| 57 { | |
| 58 .enumname = "HEBREW_LETTER", | |
| 59 .file = FILE_WORD, | |
| 60 .ucdname = "Hebrew_Letter", | |
| 61 }, | |
| 62 { | |
| 63 .enumname = "KATAKANA", | |
| 64 .file = FILE_WORD, | |
| 65 .ucdname = "Katakana", | |
| 66 }, | |
| 67 { | |
| 68 .enumname = "LF", | |
| 69 .file = FILE_WORD, | |
| 70 .ucdname = "LF", | |
| 71 }, | |
| 72 { | |
| 73 .enumname = "MIDLETTER", | |
| 74 .file = FILE_WORD, | |
| 75 .ucdname = "MidLetter", | |
| 76 }, | |
| 77 { | |
| 78 .enumname = "MIDNUM", | |
| 79 .file = FILE_WORD, | |
| 80 .ucdname = "MidNum", | |
| 81 }, | |
| 82 { | |
| 83 .enumname = "MIDNUMLET", | |
| 84 .file = FILE_WORD, | |
| 85 .ucdname = "MidNumLet", | |
| 86 }, | |
| 87 { | |
| 88 .enumname = "NEWLINE", | |
| 89 .file = FILE_WORD, | |
| 90 .ucdname = "Newline", | |
| 91 }, | |
| 92 { | |
| 93 .enumname = "NUMERIC", | |
| 94 .file = FILE_WORD, | |
| 95 .ucdname = "Numeric", | |
| 96 }, | |
| 97 { | |
| 98 .enumname = "REGIONAL_INDICATOR", | |
| 99 .file = FILE_WORD, | |
| 100 .ucdname = "Regional_Indicator", | |
| 101 }, | |
| 102 { | |
| 103 .enumname = "SINGLE_QUOTE", | |
| 104 .file = FILE_WORD, | |
| 105 .ucdname = "Single_Quote", | |
| 106 }, | |
| 107 { | |
| 108 .enumname = "WSEGSPACE", | |
| 109 .file = FILE_WORD, | |
| 110 .ucdname = "WSegSpace", | |
| 111 }, | |
| 112 { | |
| 113 .enumname = "ZWJ", | |
| 114 .file = FILE_WORD, | |
| 115 .ucdname = "ZWJ", | |
| 116 }, | |
| 117 }; | |
| 118 | |
| 119 static uint_least8_t | |
| 120 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t pr… | |
| 121 { | |
| 122 uint_least8_t result; | |
| 123 | |
| 124 (void)cp; | |
| 125 | |
| 126 if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") && | |
| 127 !strcmp(word_break_property[prop2].enumname, | |
| 128 "EXTENDED_PICTOGRAPHIC")) || | |
| 129 (!strcmp(word_break_property[prop1].enumname, | |
| 130 "EXTENDED_PICTOGRAPHIC") && | |
| 131 !strcmp(word_break_property[prop2].enumname, "ALETTER"))) { | |
| 132 for (result = 0; result < LEN(word_break_property); resu… | |
| 133 if (!strcmp(word_break_property[result].enumname, | |
| 134 "BOTH_ALETTER_EXTPICT")) { | |
| 135 break; | |
| 136 } | |
| 137 } | |
| 138 if (result == LEN(word_break_property)) { | |
| 139 fprintf(stderr, "handle_conflict: Internal error… | |
| 140 exit(1); | |
| 141 } | |
| 142 } else { | |
| 143 fprintf(stderr, "handle_conflict: Cannot handle conflict… | |
| 144 exit(1); | |
| 145 } | |
| 146 | |
| 147 return result; | |
| 148 } | |
| 149 | |
| 150 int | |
| 151 main(int argc, char *argv[]) | |
| 152 { | |
| 153 (void)argc; | |
| 154 | |
| 155 properties_generate_break_property( | |
| 156 word_break_property, LEN(word_break_property), NULL, | |
| 157 handle_conflict, NULL, "word_break", argv[0]); | |
| 158 | |
| 159 return 0; | |
| 160 } |