GopherProxy

	generate.sh: improve portability and performance, use randomness using a seed -…
	git clone git://git.codemadness.org/chess-puzzles
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 667fa14261d797a2c04938992a4efa5061e558a2
	parent b9e10f90912e4d6c82e4a4738a2fcdbd77b0d6db
	Author: Hiltjo Posthuma <[email protected]>
	Date: Thu, 21 Dec 2023 18:14:27 +0100

	generate.sh: improve portability and performance, use randomness using a seed

	- Replace shuf with a custom shuffling using awk and sort -R.
	- Use a random seed to generate random, but in a deterministic way.
	Allows regenerating the same output (atleast on the same machine).
	- Generating the puzzles is faster, tested on a machine: 10s to 3.5s
	- Show an error message when the CSV database file doesn't exist yet.

	Diffstat:
	M generate.sh \| 92 +++++++++++++++++++----------…

	1 file changed, 58 insertions(+), 34 deletions(-)
	---
	diff --git a/generate.sh b/generate.sh
	@@ -1,12 +1,47 @@
	#!/bin/sh

	fenbin="./fen"
	+db="lichess_db_puzzle.csv"
	+
	+if ! test -f "$db"; then
	+ printf 'File "%s" not found, run `make db` to update it\n' "$db" >&2
	+ exit 1
	+fi

	index="puzzles/index.html"
	rm -rf puzzles
	mkdir -p puzzles/solutions

	solutions="$(mktemp)"
	+seedfile="$(mktemp)"
	+seed=20231221 # must be a integer value
	+# seed for random sorting, makes it deterministic for the same system
	+# seed must be sufficiently long.
	+echo "${seed}_chess_puzzles" > "$seedfile"
	+
	+# shuffle(file, amount)
	+shuffle() {
	+ f="$1"
	+ total="$2"
	+ nlines="$(wc -l < "$f")"
	+ nlines="$((nlines + 0))"
	+ results="$(mktemp)"
	+
	+# generate list of lines to use. Not perfectly random but good enough.
	+LC_ALL=C awk -v "seed=$seed" -v "nlines=$nlines" -v "total=$total" '
	+BEGIN {
	+ srand(seed);
	+ for (i = 0; i < total; i++)
	+ sel[int(rand() * nlines)] = 1;
	+}
	+sel[NR] {
	+ print $0;
	+}' "$f" > "$results"
	+
	+ # now we have less results we can use the slow sort -R.
	+ sort -R --random-source "$seedfile" "$results"
	+ rm -f "$results"
	+}

	cat > "$index" <<!
	<!DOCTYPE html>
	@@ -38,42 +73,30 @@ footer {
	!

	# shuffle, some sort of order and point system based on rating of puzzle.
	-db="lichess_db_puzzle.csv"
	count=1

	-(grep 'mateIn1' < "$db" \| shuf -n 100 \| sed 10q
	-grep 'mateIn2' < "$db" \| shuf -n 100 \| sed 10q
	-grep 'mateIn3' < "$db" \| shuf -n 100 \| sed 10q
	-grep 'mateIn4' < "$db" \| shuf -n 100 \| sed 10q
	-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 { print $0 }…
	-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 { print $0 …
	-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 { print $0 …
	-) \|
	-LC_ALL=C awk -F ',' '
	-{
	- points="1 point"; # default
	-}
	-(" " $8 " ") ~ / mateIn2 / {
	- points="2 points";
	-}
	-(" " $8 " ") ~ / mateIn3 / {
	- points="3 points";
	-}
	-(" " $8 " ") ~ / mateIn4 / {
	- points="4 points";
	-}
	-(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 {
	- points="5 points";
	-}
	-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 {
	- points="7 points";
	-}
	-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 {
	- points="10 points";
	-}
	-{
	- print $0 "," points;
	-}' \| \
	+groupsdir="$(mktemp -d)"
	+test "$groupsdir" = "" && exit 1
	+
	+grep 'mateIn1' "$db" > "$groupsdir/matein1.csv"
	+grep 'mateIn2' "$db" > "$groupsdir/matein2.csv"
	+grep 'mateIn3' "$db" > "$groupsdir/matein3.csv"
	+grep 'mateIn4' "$db" > "$groupsdir/matein4.csv"
	+grep 'mateIn5' "$db" > "$groupsdir/matein5.csv"
	+LC_ALL=C awk -F ',' 'int($4) < 2000 { print $0 }' "$groupsdir/matein5.csv" > "…
	+LC_ALL=C awk -F ',' 'int($4) >= 2000 { print $0 }' "$groupsdir/matein5.csv" > …
	+LC_ALL=C awk -F ',' 'int($4) >= 2700 { print $0 }' "$groupsdir/matein5.csv" > …
	+
	+(
	+shuffle "$groupsdir/matein1.csv" 100 \| sed 10q \| LC_ALL=C awk '{ print $0 ",1 …
	+shuffle "$groupsdir/matein2.csv" 100 \| sed 10q \| LC_ALL=C awk '{ print $0 ",2 …
	+shuffle "$groupsdir/matein3.csv" 100 \| sed 10q \| LC_ALL=C awk '{ print $0 ",3 …
	+shuffle "$groupsdir/matein4.csv" 100 \| sed 10q \| LC_ALL=C awk '{ print $0 ",4 …
	+shuffle "$groupsdir/matein5_lt_2000.csv" 100 \| sed 5q \| LC_ALL=C awk '{ print …
	+shuffle "$groupsdir/matein5_ge_2000.csv" \| sed 3q \| LC_ALL=C awk '{ print $0 "…
	+shuffle "$groupsdir/matein5_ge_2700.csv" \| sed 2q \| LC_ALL=C awk '{ print $0 "…
	+rm -rf "$groupsdir"
	+) \| \
	while read -r line; do
	i="$count"
	fen=$(printf '%s' "$line" \| cut -f 2 -d ',')
	@@ -196,3 +219,4 @@ cat >> "$index" <<!
	!

	rm -f "$solutions"
	+rm -f "$seedfile"