| tawk: split record into runes for empty FS (#292) - plan9port - [fork] Plan 9 f… | |
| git clone git://src.adamsgaard.dk/plan9port | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| commit 1309450668aa571dee97f4373f9555b4fddcf1aa | |
| parent 715807d706cd13bc583588477a84090fbf02e057 | |
| Author: Fazlul Shahriar <[email protected]> | |
| Date: Tue, 29 Oct 2019 10:04:06 -0400 | |
| awk: split record into runes for empty FS (#292) | |
| awk was splitting records into bytes instead of runes for empty FS. | |
| For example, this was printing only the first byte of the utf-8 encoding | |
| of é: | |
| echo é | awk 'BEGIN{FS=""}{print $1}' | |
| The change just copies how the `split` function handles runes. | |
| Originally reported by kris on twitter: | |
| https://twitter.com/p9luv/status/1180436083433201665 | |
| Diffstat: | |
| M src/cmd/awk/lib.c | 13 +++++++++---- | |
| 1 file changed, 9 insertions(+), 4 deletions(-) | |
| --- | |
| diff --git a/src/cmd/awk/lib.c b/src/cmd/awk/lib.c | |
| t@@ -29,6 +29,7 @@ THIS SOFTWARE. | |
| #include <errno.h> | |
| #include <stdlib.h> | |
| #include <stdarg.h> | |
| +#include <utf.h> | |
| #include "awk.h" | |
| #include "y.tab.h" | |
| t@@ -293,15 +294,19 @@ void fldbld(void) /* create fields from current r… | |
| } | |
| *fr = 0; | |
| } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 … | |
| - for (i = 0; *r != 0; r++) { | |
| - char buf[2]; | |
| + int nb; | |
| + for (i = 0; *r != 0; r += nb) { | |
| + Rune rr; | |
| + char buf[UTFmax+1]; | |
| + | |
| i++; | |
| if (i > nfields) | |
| growfldtab(i); | |
| if (freeable(fldtab[i])) | |
| xfree(fldtab[i]->sval); | |
| - buf[0] = *r; | |
| - buf[1] = 0; | |
| + nb = chartorune(&rr, r); | |
| + memmove(buf, r, nb); | |
| + buf[nb] = '\0'; | |
| fldtab[i]->sval = tostring(buf); | |
| fldtab[i]->tval = FLD | STR; | |
| } |