Introduction
Introduction Statistics Contact Development Disclaimer Help
generate.sh: improve portability and performance, use randomness using a seed -…
git clone git://git.codemadness.org/chess-puzzles
Log
Files
Refs
README
LICENSE
---
commit 667fa14261d797a2c04938992a4efa5061e558a2
parent b9e10f90912e4d6c82e4a4738a2fcdbd77b0d6db
Author: Hiltjo Posthuma <[email protected]>
Date: Thu, 21 Dec 2023 18:14:27 +0100
generate.sh: improve portability and performance, use randomness using a seed
- Replace shuf with a custom shuffling using awk and sort -R.
- Use a random seed to generate random, but in a deterministic way.
Allows regenerating the same output (atleast on the same machine).
- Generating the puzzles is faster, tested on a machine: 10s to 3.5s
- Show an error message when the CSV database file doesn't exist yet.
Diffstat:
M generate.sh | 92 +++++++++++++++++++----------…
1 file changed, 58 insertions(+), 34 deletions(-)
---
diff --git a/generate.sh b/generate.sh
@@ -1,12 +1,47 @@
#!/bin/sh
fenbin="./fen"
+db="lichess_db_puzzle.csv"
+
+if ! test -f "$db"; then
+ printf 'File "%s" not found, run `make db` to update it\n' "$db" >&2
+ exit 1
+fi
index="puzzles/index.html"
rm -rf puzzles
mkdir -p puzzles/solutions
solutions="$(mktemp)"
+seedfile="$(mktemp)"
+seed=20231221 # must be a integer value
+# seed for random sorting, makes it deterministic for the same system
+# seed must be sufficiently long.
+echo "${seed}_chess_puzzles" > "$seedfile"
+
+# shuffle(file, amount)
+shuffle() {
+ f="$1"
+ total="$2"
+ nlines="$(wc -l < "$f")"
+ nlines="$((nlines + 0))"
+ results="$(mktemp)"
+
+# generate list of lines to use. Not perfectly random but good enough.
+LC_ALL=C awk -v "seed=$seed" -v "nlines=$nlines" -v "total=$total" '
+BEGIN {
+ srand(seed);
+ for (i = 0; i < total; i++)
+ sel[int(rand() * nlines)] = 1;
+}
+sel[NR] {
+ print $0;
+}' "$f" > "$results"
+
+ # now we have less results we can use the slow sort -R.
+ sort -R --random-source "$seedfile" "$results"
+ rm -f "$results"
+}
cat > "$index" <<!
<!DOCTYPE html>
@@ -38,42 +73,30 @@ footer {
!
# shuffle, some sort of order and point system based on rating of puzzle.
-db="lichess_db_puzzle.csv"
count=1
-(grep 'mateIn1' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn2' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn3' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn4' < "$db" | shuf -n 100 | sed 10q
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 { print $0 }…
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 { print $0 …
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 { print $0 …
-) |
-LC_ALL=C awk -F ',' '
-{
- points="1 point"; # default
-}
-(" " $8 " ") ~ / mateIn2 / {
- points="2 points";
-}
-(" " $8 " ") ~ / mateIn3 / {
- points="3 points";
-}
-(" " $8 " ") ~ / mateIn4 / {
- points="4 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 {
- points="5 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 {
- points="7 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 {
- points="10 points";
-}
-{
- print $0 "," points;
-}' | \
+groupsdir="$(mktemp -d)"
+test "$groupsdir" = "" && exit 1
+
+grep 'mateIn1' "$db" > "$groupsdir/matein1.csv"
+grep 'mateIn2' "$db" > "$groupsdir/matein2.csv"
+grep 'mateIn3' "$db" > "$groupsdir/matein3.csv"
+grep 'mateIn4' "$db" > "$groupsdir/matein4.csv"
+grep 'mateIn5' "$db" > "$groupsdir/matein5.csv"
+LC_ALL=C awk -F ',' 'int($4) < 2000 { print $0 }' "$groupsdir/matein5.csv" > "…
+LC_ALL=C awk -F ',' 'int($4) >= 2000 { print $0 }' "$groupsdir/matein5.csv" > …
+LC_ALL=C awk -F ',' 'int($4) >= 2700 { print $0 }' "$groupsdir/matein5.csv" > …
+
+(
+shuffle "$groupsdir/matein1.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",1 …
+shuffle "$groupsdir/matein2.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",2 …
+shuffle "$groupsdir/matein3.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",3 …
+shuffle "$groupsdir/matein4.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",4 …
+shuffle "$groupsdir/matein5_lt_2000.csv" 100 | sed 5q | LC_ALL=C awk '{ print …
+shuffle "$groupsdir/matein5_ge_2000.csv" | sed 3q | LC_ALL=C awk '{ print $0 "…
+shuffle "$groupsdir/matein5_ge_2700.csv" | sed 2q | LC_ALL=C awk '{ print $0 "…
+rm -rf "$groupsdir"
+) | \
while read -r line; do
i="$count"
fen=$(printf '%s' "$line" | cut -f 2 -d ',')
@@ -196,3 +219,4 @@ cat >> "$index" <<!
!
rm -f "$solutions"
+rm -f "$seedfile"
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.