generate.sh: improve portability and performance, use randomness using a seed -… | |
git clone git://git.codemadness.org/chess-puzzles | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 667fa14261d797a2c04938992a4efa5061e558a2 | |
parent b9e10f90912e4d6c82e4a4738a2fcdbd77b0d6db | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Thu, 21 Dec 2023 18:14:27 +0100 | |
generate.sh: improve portability and performance, use randomness using a seed | |
- Replace shuf with a custom shuffling using awk and sort -R. | |
- Use a random seed to generate random, but in a deterministic way. | |
Allows regenerating the same output (atleast on the same machine). | |
- Generating the puzzles is faster, tested on a machine: 10s to 3.5s | |
- Show an error message when the CSV database file doesn't exist yet. | |
Diffstat: | |
M generate.sh | 92 +++++++++++++++++++----------… | |
1 file changed, 58 insertions(+), 34 deletions(-) | |
--- | |
diff --git a/generate.sh b/generate.sh | |
@@ -1,12 +1,47 @@ | |
#!/bin/sh | |
fenbin="./fen" | |
+db="lichess_db_puzzle.csv" | |
+ | |
+if ! test -f "$db"; then | |
+ printf 'File "%s" not found, run `make db` to update it\n' "$db" >&2 | |
+ exit 1 | |
+fi | |
index="puzzles/index.html" | |
rm -rf puzzles | |
mkdir -p puzzles/solutions | |
solutions="$(mktemp)" | |
+seedfile="$(mktemp)" | |
+seed=20231221 # must be a integer value | |
+# seed for random sorting, makes it deterministic for the same system | |
+# seed must be sufficiently long. | |
+echo "${seed}_chess_puzzles" > "$seedfile" | |
+ | |
+# shuffle(file, amount) | |
+shuffle() { | |
+ f="$1" | |
+ total="$2" | |
+ nlines="$(wc -l < "$f")" | |
+ nlines="$((nlines + 0))" | |
+ results="$(mktemp)" | |
+ | |
+# generate list of lines to use. Not perfectly random but good enough. | |
+LC_ALL=C awk -v "seed=$seed" -v "nlines=$nlines" -v "total=$total" ' | |
+BEGIN { | |
+ srand(seed); | |
+ for (i = 0; i < total; i++) | |
+ sel[int(rand() * nlines)] = 1; | |
+} | |
+sel[NR] { | |
+ print $0; | |
+}' "$f" > "$results" | |
+ | |
+ # now we have less results we can use the slow sort -R. | |
+ sort -R --random-source "$seedfile" "$results" | |
+ rm -f "$results" | |
+} | |
cat > "$index" <<! | |
<!DOCTYPE html> | |
@@ -38,42 +73,30 @@ footer { | |
! | |
# shuffle, some sort of order and point system based on rating of puzzle. | |
-db="lichess_db_puzzle.csv" | |
count=1 | |
-(grep 'mateIn1' < "$db" | shuf -n 100 | sed 10q | |
-grep 'mateIn2' < "$db" | shuf -n 100 | sed 10q | |
-grep 'mateIn3' < "$db" | shuf -n 100 | sed 10q | |
-grep 'mateIn4' < "$db" | shuf -n 100 | sed 10q | |
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 { print $0 }… | |
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 { print $0 … | |
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 { print $0 … | |
-) | | |
-LC_ALL=C awk -F ',' ' | |
-{ | |
- points="1 point"; # default | |
-} | |
-(" " $8 " ") ~ / mateIn2 / { | |
- points="2 points"; | |
-} | |
-(" " $8 " ") ~ / mateIn3 / { | |
- points="3 points"; | |
-} | |
-(" " $8 " ") ~ / mateIn4 / { | |
- points="4 points"; | |
-} | |
-(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 { | |
- points="5 points"; | |
-} | |
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 { | |
- points="7 points"; | |
-} | |
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 { | |
- points="10 points"; | |
-} | |
-{ | |
- print $0 "," points; | |
-}' | \ | |
+groupsdir="$(mktemp -d)" | |
+test "$groupsdir" = "" && exit 1 | |
+ | |
+grep 'mateIn1' "$db" > "$groupsdir/matein1.csv" | |
+grep 'mateIn2' "$db" > "$groupsdir/matein2.csv" | |
+grep 'mateIn3' "$db" > "$groupsdir/matein3.csv" | |
+grep 'mateIn4' "$db" > "$groupsdir/matein4.csv" | |
+grep 'mateIn5' "$db" > "$groupsdir/matein5.csv" | |
+LC_ALL=C awk -F ',' 'int($4) < 2000 { print $0 }' "$groupsdir/matein5.csv" > "… | |
+LC_ALL=C awk -F ',' 'int($4) >= 2000 { print $0 }' "$groupsdir/matein5.csv" > … | |
+LC_ALL=C awk -F ',' 'int($4) >= 2700 { print $0 }' "$groupsdir/matein5.csv" > … | |
+ | |
+( | |
+shuffle "$groupsdir/matein1.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",1 … | |
+shuffle "$groupsdir/matein2.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",2 … | |
+shuffle "$groupsdir/matein3.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",3 … | |
+shuffle "$groupsdir/matein4.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",4 … | |
+shuffle "$groupsdir/matein5_lt_2000.csv" 100 | sed 5q | LC_ALL=C awk '{ print … | |
+shuffle "$groupsdir/matein5_ge_2000.csv" | sed 3q | LC_ALL=C awk '{ print $0 "… | |
+shuffle "$groupsdir/matein5_ge_2700.csv" | sed 2q | LC_ALL=C awk '{ print $0 "… | |
+rm -rf "$groupsdir" | |
+) | \ | |
while read -r line; do | |
i="$count" | |
fen=$(printf '%s' "$line" | cut -f 2 -d ',') | |
@@ -196,3 +219,4 @@ cat >> "$index" <<! | |
! | |
rm -f "$solutions" | |
+rm -f "$seedfile" |