GopherProxy

	sfeed_update: use xargs -P -0 - sfeed - RSS and Atom parser
	git clone git://git.codemadness.org/sfeed
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit cdb8f7feb135adf6f18e389b4bbf47886089474a
	parent 62bfed65ca91c34ea24b81b191c23d4542a7075b
	Author: Hiltjo Posthuma <[email protected]>
	Date: Tue, 26 Dec 2023 15:59:39 +0100

	sfeed_update: use xargs -P -0

	Some of the options, like -P are as of writing (2023) non-POSIX:
	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html. However
	many systems support this useful extension for many years now.

	Some historic context:

	The xargs -0 option was added on 1996-06-11, about a year after the NetBSD
	import (over 27 years ago at the time of writing):

	http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.2&cont…

	On OpenBSD the xargs -P option was added on 2003-12-06 by syncing the FreeBSD
	code:

	http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.14&con…

	Looking at the imported git history log of GNU findutils (which has xargs), the
	very first commit already had the -0 and -P option on Sun Feb 4 20:35:16 1996
	+0000.

	Tested on many systems, old and new, some notable:

	- OpenBSD 7.4
	- Void Linux
	- FreeBSD 12
	- NetBSD 9.3
	- HaikuOS (uses GNU tools).
	- Slackware 11
	- OpenBSD 3.8
	- NetBSD 5.1

	Some shells:
	- oksh
	- bash
	- dash
	- zsh

	During testing there are some incompatibilities found in parsing the fields so
	the arguments are passed as one argument which is split later on by the child
	program.

	Diffstat:
	M sfeed_update \| 48 +++++++++++++++++++++--------…

	1 file changed, 32 insertions(+), 16 deletions(-)
	---
	diff --git a/sfeed_update b/sfeed_update
	@@ -163,14 +163,12 @@ _feed() {
	# fetch and process a feed in parallel.
	# feed(name, feedurl, [basesiteurl], [encoding])
	feed() {
	- # wait until ${maxjobs} are finished: will stall the queue if an item
	- # is slow, but it is portable.
	- [ ${signo} -ne 0 ] && return
	- [ $((curjobs % maxjobs)) -eq 0 ] && wait
	- [ ${signo} -ne 0 ] && return
	- curjobs=$((curjobs + 1))
	-
	- _feed "$@" &
	+ # Job parameters for xargs.
	+ # Specify fields as a single parameter separated by the NUL separator.
	+ # These fields are split later by the child process, this allows xargs
	+ # with empty fields across many implementations.
	+ printf '%s\037%s\037%s\037%s\037%s\037%s\0' \
	+ "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4"
	}

	cleanup() {
	@@ -201,8 +199,6 @@ feeds() {
	}

	main() {
	- # job counter.
	- curjobs=0
	# signal number received for parent.
	signo=0
	# SIGINT: signal to interrupt parent.
	@@ -217,16 +213,36 @@ main() {
	touch "${sfeedtmpdir}/ok" \|\| die
	# make sure path exists.
	mkdir -p "${sfeedpath}"
	- # fetch feeds specified in config file.
	- feeds
	- # wait till all feeds are fetched (concurrently).
	- [ ${signo} -eq 0 ] && wait
	- # check error exit status indicator for parallel jobs.
	- [ -f "${sfeedtmpdir}/ok" ]
	+
	+ # print feeds for parallel processing with xargs.
	+ feeds > "${sfeedtmpdir}/jobs" \|\| die
	+ SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \
	+ "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs"
	statuscode=$?
	+
	+ # check error exit status indicator for parallel jobs.
	+ [ -f "${sfeedtmpdir}/ok" ] \|\| statuscode=1
	# on signal SIGINT and SIGTERM exit with signal number + 128.
	[ ${signo} -ne 0 ] && die $((signo+128))
	die ${statuscode}
	}

	+# process a single feed.
	+# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding
	+if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then
	+ IFS="" # "\037"
	+ [ "$1" = "" ] && exit 0 # must have an argument set
	+ printf '%s\n' "$1" \| \
	+ while read -r config tmpdir name feedurl basesiteurl encoding; do
	+ # load config file, sets $config.
	+ loadconfig "${config}"
	+ sfeedtmpdir="${tmpdir}"
	+ _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}"
	+ exit "$?"
	+ done
	+ exit 0
	+fi
	+
	+# ...else parent mode:
	+argv0="$0" # remember $0, in shells like zsh $0 is the function name.
	[ "${SFEED_UPDATE_INCLUDE}" = "1" ] \|\| main "$@"