GopherProxy

	tawk: split record into runes for empty FS (#292) - plan9port - [fork] Plan 9 f…
	git clone git://src.adamsgaard.dk/plan9port
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 1309450668aa571dee97f4373f9555b4fddcf1aa
	parent 715807d706cd13bc583588477a84090fbf02e057
	Author: Fazlul Shahriar <[email protected]>
	Date: Tue, 29 Oct 2019 10:04:06 -0400

	awk: split record into runes for empty FS (#292)

	awk was splitting records into bytes instead of runes for empty FS.
	For example, this was printing only the first byte of the utf-8 encoding
	of é:

	echo é \| awk 'BEGIN{FS=""}{print $1}'

	The change just copies how the `split` function handles runes.

	Originally reported by kris on twitter:
	https://twitter.com/p9luv/status/1180436083433201665
	Diffstat:
	M src/cmd/awk/lib.c \| 13 +++++++++----

	1 file changed, 9 insertions(+), 4 deletions(-)
	---
	diff --git a/src/cmd/awk/lib.c b/src/cmd/awk/lib.c
	t@@ -29,6 +29,7 @@ THIS SOFTWARE.
	#include <errno.h>
	#include <stdlib.h>
	#include <stdarg.h>
	+#include <utf.h>
	#include "awk.h"
	#include "y.tab.h"

	t@@ -293,15 +294,19 @@ void fldbld(void) /* create fields from current r…
	}
	*fr = 0;
	} else if ((sep = inputFS) == 0) { / new: FS="" => 1 …
	- for (i = 0; *r != 0; r++) {
	- char buf[2];
	+ int nb;
	+ for (i = 0; *r != 0; r += nb) {
	+ Rune rr;
	+ char buf[UTFmax+1];
	+
	i++;
	if (i > nfields)
	growfldtab(i);
	if (freeable(fldtab[i]))
	xfree(fldtab[i]->sval);
	- buf[0] = *r;
	- buf[1] = 0;
	+ nb = chartorune(&rr, r);
	+ memmove(buf, r, nb);
	+ buf[nb] = '\0';
	fldtab[i]->sval = tostring(buf);
	fldtab[i]->tval = FLD \| STR;
	}