Add gopher hackathon. - brcon2024-hackathons - Bitreichcon 2024 Hackathons | |
git clone git://bitreich.org/brcon2024-hackathons git://enlrupgkhuxnvlhsf6lc3fz… | |
Log | |
Files | |
Refs | |
Tags | |
Submodules | |
--- | |
commit ead4dcfa2976dac07e8fe03d23fa19424922b771 | |
parent 14f1a3a9baed42894753c5378cd2b3eb7ead1d4d | |
Author: Christoph Lohmann <[email protected]> | |
Date: Fri, 2 Aug 2024 15:45:14 +0200 | |
Add gopher hackathon. | |
Diffstat: | |
A gopher/README.md | 28 ++++++++++++++++++++++++++++ | |
A gopher/gopher-clustering/ga.c | 206 +++++++++++++++++++++++++++++… | |
A gopher/gopher-clustering/gopher-cl… | 2219 +++++++++++++++++++++++++++… | |
A gopher/gopher-clustering/gopher-cl… | 1165 +++++++++++++++++++++++++++… | |
A gopher/gopher-clustering/hint.md | 9 +++++++++ | |
5 files changed, 3627 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/gopher/README.md b/gopher/README.md | |
@@ -0,0 +1,28 @@ | |
+# Gopher Hackathon | |
+ | |
+## GopherVR | |
+ | |
+Recreate GopherVR with modern methods: | |
+ | |
+ https://github.com/michael-lazar/gopherVR | |
+ https://github.com/michael-lazar/gopherVR-models | |
+ | |
+Here is more modern exports: | |
+ | |
+ https://www.khronos.org/gltf/ | |
+ | |
+Just plumb gltf files into some usable game engine? | |
+ | |
+ https://godotengine.org/article/we-should-all-use-gltf-20-export-3d-as… | |
+ | |
+## Gopher Clustering | |
+ | |
+In gopher-clustering/ you find ideas by pazz0 for how to scan gopher | |
+menus and allow some clustering to topics. | |
+ | |
+How can this be optimized and made a common algorithm? | |
+ | |
+This will help further gopher automated development and new UIs, like | |
+GopherVR. | |
+ | |
+ | |
diff --git a/gopher/gopher-clustering/ga.c b/gopher/gopher-clustering/ga.c | |
@@ -0,0 +1,206 @@ | |
+#include <stdint.h> | |
+#include <stdio.h> | |
+#include <stddef.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+struct machine { | |
+ uint8_t a; | |
+ | |
+ uint8_t loads; | |
+ uint8_t adds; | |
+ uint8_t subs; | |
+ uint8_t times; | |
+ uint8_t noops; | |
+ | |
+ uint8_t instr; | |
+}; | |
+ | |
+enum { | |
+ LOAD, | |
+ ADD, | |
+ SUB, | |
+ TIMES, | |
+ END, | |
+}; | |
+ | |
+#define CODE_LENGTH 16 | |
+#define POOL_LENGTH 16 | |
+struct genome { | |
+ uint8_t code[CODE_LENGTH + 1]; /* +1 sentinal (0) */ | |
+ uint16_t fitness; | |
+} pool[POOL_LENGTH]; | |
+ | |
+uint8_t newcode[CODE_LENGTH]; | |
+ | |
+void | |
+execute(struct machine *m, size_t l, uint8_t code[]) | |
+{ | |
+ size_t ip; | |
+ int16_t c; | |
+ | |
+ for (ip = 0; ip < l;) { | |
+ switch (code[ip++] & 0x7) { | |
+ case LOAD: | |
+ m->loads++; | |
+ m->a = code[ip++]; | |
+ break; | |
+ case ADD: | |
+ m->adds++; | |
+ c = (int16_t)m->a + code[ip++]; | |
+ if (c > 255) | |
+ m->a = 0; | |
+ else | |
+ m->a = c; | |
+ break; | |
+ case SUB: | |
+ m->subs++; | |
+ c = (int16_t)m->a - code[ip++]; | |
+ if (c < 0) | |
+ m->a = 0; | |
+ else | |
+ m->a = c; | |
+ break; | |
+ case TIMES: | |
+ m->times++; | |
+ m->a = (uint16_t)m->a * code[ip++] / 255; | |
+ break; | |
+ case END: | |
+ m->instr++; | |
+ return; | |
+ break; | |
+ default: | |
+ m->noops++; | |
+ break; | |
+ } | |
+ m->instr++; | |
+ } | |
+} | |
+ | |
+void | |
+mutate(uint8_t code[], size_t l) | |
+{ | |
+ uint8_t i, bi, j; | |
+ | |
+ for (j = 0; j < 4; j++) { | |
+ if (rand() % 100 >= 50) | |
+ continue; | |
+ | |
+ i = rand() % CODE_LENGTH; | |
+ bi = rand() % 8; | |
+ | |
+ code[i] ^= 1 << bi; | |
+ } | |
+} | |
+ | |
+#define min(a, b) (((a) < (b)) ? (a) : (b)) | |
+ | |
+void | |
+cross(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl) | |
+{ | |
+ uint8_t i, si, sl; | |
+ uint8_t *one, *two; | |
+ | |
+ if (rand() % 2) { | |
+ one = a; | |
+ two = b; | |
+ } else { | |
+ one = b; | |
+ two = a; | |
+ } | |
+ | |
+ si = rand() % (min(al, bl) + 1); | |
+ sl = rand() % (min(al, bl) - si + 1); | |
+ | |
+ for (i = 0; i < si; i++) | |
+ c[i] = one[i]; | |
+ | |
+ for (; i < si + sl; i++) | |
+ c[i] = two[i]; | |
+ | |
+ for (; i < cl; i++) | |
+ c[i] = one[i]; | |
+} | |
+ | |
+void | |
+cross2(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl) | |
+{ | |
+ uint8_t i; | |
+ | |
+ for (i = 0; i < al; i++) | |
+ c[i] = a[i] + b[i]; | |
+} | |
+ | |
+void | |
+printcode(uint8_t code[], size_t l) | |
+{ | |
+ uint8_t i; | |
+ | |
+ for (i = 0; i < l;) { | |
+ switch (code[i++] & 0x7) { | |
+ case LOAD: | |
+ printf("LOAD %d ", code[i++]); | |
+ break; | |
+ case ADD: | |
+ printf("ADD %d ", code[i++]); | |
+ break; | |
+ case SUB: | |
+ printf("SUB %d ", code[i++]); | |
+ break; | |
+ case TIMES: | |
+ printf("TIMES %d ", code[i++]); | |
+ break; | |
+ case END: | |
+ printf("END "); | |
+ return; | |
+ break; | |
+ default: | |
+ printf("NOOP "); | |
+ break; | |
+ } | |
+ } | |
+} | |
+ | |
+int | |
+main(void) | |
+{ | |
+ struct machine m; | |
+ struct genome *g1, *g2, *w1; | |
+ size_t i, j; | |
+ | |
+ for (i = 0; i < POOL_LENGTH; i++) { | |
+ for (j = 0; j < CODE_LENGTH; j++) | |
+ pool[i].code[j] = rand(); | |
+ pool[i].code[j] = 0; | |
+ } | |
+ | |
+ do { | |
+ g1 = g2 = w1 = NULL; | |
+ | |
+ for (i = 0; i < POOL_LENGTH; i++) { | |
+ memset(&m, 0, sizeof(m)); | |
+ | |
+ execute(&m, CODE_LENGTH, pool[i].code); | |
+ pool[i].fitness = m.a * 255 / (m.instr + m.loads); | |
+ | |
+ if (!g1 || g1->fitness < pool[i].fitness) { | |
+ g2 = g1; | |
+ g1 = &pool[i]; | |
+ } else if (!g2 || g2->fitness < pool[i].fitness) { | |
+ w1 = g2; | |
+ g2 = &pool[i]; | |
+ } else if (!w1 || w1->fitness > pool[i].fitness) { | |
+ w1 = &pool[i]; | |
+ } | |
+ } | |
+ | |
+ printf("g1->fitness = %d, g2->fitness = %d, w1->fitness = %d\n… | |
+ cross(g1->code, CODE_LENGTH, g2->code, CODE_LENGTH, newcode, C… | |
+ mutate(newcode, CODE_LENGTH); | |
+ | |
+ printcode(newcode, CODE_LENGTH); | |
+ puts(""); | |
+ | |
+ memcpy(w1->code, newcode, CODE_LENGTH); | |
+ } while (1); | |
+} | |
diff --git a/gopher/gopher-clustering/gopher-clustering.c b/gopher/gopher-clust… | |
@@ -0,0 +1,2219 @@ | |
+#define _POSIX_C_SOURCE 200112L | |
+ | |
+#include <assert.h> | |
+#include <stdarg.h> | |
+#include <stdint.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+#include <math.h> | |
+ | |
+#include <netdb.h> | |
+#include <sys/socket.h> | |
+#include <sys/types.h> | |
+#include <termios.h> | |
+#include <unistd.h> | |
+ | |
+uint32_t | |
+fnv1a(int n,...) | |
+{ | |
+ int i; | |
+ char *s; | |
+ va_list l; | |
+ uint32_t h; | |
+ | |
+ h = 0x811c9dc5; | |
+ | |
+ va_start(l, n); | |
+ for (i = 0; i < n; i++) { | |
+ for (s = va_arg(l, char*); *s; s++) { | |
+ h ^= *s; | |
+ h *= 0x01000193; | |
+ } | |
+ } | |
+ va_end(l); | |
+ | |
+ return h; | |
+} | |
+ | |
+uint32_t | |
+xorshift(uint32_t *s) | |
+{ | |
+ *s ^= *s << 13; | |
+ *s ^= *s >> 17; | |
+ *s ^= *s << 5; | |
+ return *s; | |
+} | |
+ | |
+/* | |
+ https://github.com/skeeto/hash-prospector/issues/19#issuecomment-11201… | |
+*/ | |
+uint32_t | |
+mix(uint32_t x) | |
+{ | |
+ x ^= x >> 16; | |
+ x *= 0x21f0aaad; | |
+ x ^= x >> 15; | |
+ x *= 0x735a2d97; | |
+ x ^= x >> 16; | |
+ return x; | |
+} | |
+ | |
+struct cell { | |
+ void *data; | |
+ char c; | |
+}; | |
+ | |
+#define MAPHEIGHT (50) | |
+#define MAPWIDTH (160) | |
+struct cell map[MAPHEIGHT][MAPWIDTH]; | |
+ | |
+struct gopherentry { | |
+ struct gopherentry *next; | |
+ char type; | |
+ char *fields[4]; | |
+ size_t i, s; | |
+}; | |
+ | |
+char * | |
+gopher_fieldend(char *s) | |
+{ | |
+ for (; *s; s++) { | |
+ if (*s == '\t') | |
+ break; | |
+ if (*s == '\r' && *(s+1) == '\n') | |
+ break; | |
+ if (*s == '\n') | |
+ break; | |
+ } | |
+ return s; | |
+} | |
+ | |
+char * | |
+gopher_nextentry(char *s) | |
+{ | |
+ char *c; | |
+ | |
+ if (c = strchr(s, '\n')) | |
+ return c + 1; | |
+ return NULL; | |
+} | |
+ | |
+void * | |
+xmalloc(size_t s) | |
+{ | |
+ void *p; | |
+ | |
+ if (!(p = malloc(s))) { | |
+ perror(NULL); | |
+ exit(1); | |
+ } | |
+ return p; | |
+} | |
+ | |
+void * | |
+xrealloc(void *p, size_t s) | |
+{ | |
+ if (!(p = realloc(p, s))) { | |
+ perror(NULL); | |
+ exit(1); | |
+ } | |
+ return p; | |
+} | |
+ | |
+/* Allow a lot of ugly things... */ | |
+size_t | |
+gopher_parsemenu(char *d, struct gopherentry **g) | |
+{ | |
+ ssize_t gl; | |
+ size_t fl, i, s; | |
+ char *c, *p, pt; | |
+ struct gopherentry *cg; | |
+ | |
+ gl = 0; | |
+ cg = NULL; | |
+ | |
+ s = 0; | |
+ pt = 0; | |
+ for (c = d; c && *c; c = gopher_nextentry(c)) { | |
+ if (*c == '.') | |
+ break; | |
+ if (*c == '\n') | |
+ continue; | |
+ | |
+ gl++; | |
+ cg = xrealloc(cg, gl * sizeof(struct gopherentry)); | |
+ memset(&cg[gl-1], 0, sizeof(struct gopherentry)); | |
+ | |
+ if (*c != 'i' && pt == 'i') | |
+ s++; | |
+ pt = *c; | |
+ | |
+ cg[gl-1].type = *c; | |
+ cg[gl-1].i = gl-1; | |
+ cg[gl-1].s = s; | |
+ c++; | |
+ | |
+ for (i = 0; i < 4; i++) { | |
+ p = gopher_fieldend(c); | |
+ fl = p - c; | |
+ cg[gl-1].fields[i] = xmalloc(fl + 1); | |
+ memcpy(cg[gl-1].fields[i], c, fl); | |
+ cg[gl-1].fields[i][fl] = '\0'; | |
+ if (*p != '\t') | |
+ break; | |
+ c = p + 1; | |
+ } | |
+ } | |
+ s++; | |
+ *g = cg; | |
+ return gl; | |
+} | |
+ | |
+ssize_t | |
+gopher_removeentries(struct gopherentry *g, size_t l) | |
+{ | |
+ size_t i, j; | |
+ | |
+ for (i = 0; i < l;) { | |
+ if ((g[i].type == 'i' && g[i].type == '3') || !g[i].fields[1] … | |
+ l--; | |
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); | |
+ continue; | |
+ } | |
+ for (j = 0; j < i; j++) { | |
+ if (!strcmp(g[i].fields[1], g[j].fields[1]) && | |
+ !strcmp(g[i].fields[2], g[j].fields[2]) && | |
+ !strcmp(g[i].fields[3], g[j].fields[3])) | |
+ break; | |
+ } | |
+ if (j < i) { | |
+ l--; | |
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); | |
+ continue; | |
+ } | |
+ i++; | |
+ } | |
+ | |
+ return l; | |
+} | |
+ | |
+ssize_t | |
+gopher_request(char *host, char *port, char *selector, char *buffer, size_t l) | |
+{ | |
+ int fd; | |
+ struct addrinfo hints; | |
+ struct addrinfo *r, *rp; | |
+ ssize_t n, result, rn; | |
+ | |
+ memset(&hints, 0, sizeof(hints)); | |
+ hints.ai_family = AF_UNSPEC; | |
+ hints.ai_socktype = SOCK_STREAM; | |
+ | |
+ if (getaddrinfo(host, port, &hints, &r) != 0) | |
+ return -1; | |
+ | |
+ for (rp = r; rp; rp = rp->ai_next) { | |
+ if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protoc… | |
+ continue; | |
+ if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1) | |
+ break; | |
+ close(fd); | |
+ } | |
+ freeaddrinfo(r); | |
+ if (!rp) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ if (write(fd, selector, strlen(selector)) != strlen(selector)) | |
+ goto cleanup; | |
+ | |
+ if (write(fd, "\r\n", 2) != 2) | |
+ goto cleanup; | |
+ | |
+ n = 0; | |
+ while (1) { | |
+ if (n + 512 > l - 1) | |
+ goto cleanup; | |
+ if ((rn = read(fd, &buffer[n], 512)) == -1) | |
+ goto cleanup; | |
+ else if (!rn) | |
+ break; | |
+ n += rn; | |
+ } | |
+ | |
+ buffer[n] = '\0'; | |
+ result = n; | |
+ | |
+cleanup: | |
+ close(fd); | |
+ | |
+ return result; | |
+} | |
+ | |
+size_t | |
+triangularnumber(size_t n) | |
+{ | |
+ return (n * n + n) / 2; | |
+} | |
+ | |
+size_t | |
+distanceindex(size_t i, size_t j) | |
+{ | |
+ size_t t; | |
+ | |
+ if (i < j) { | |
+ t = i; | |
+ i = j; | |
+ j = t; | |
+ } | |
+ | |
+ return triangularnumber(i) + j; | |
+} | |
+ | |
+/* | |
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_… | |
+ Which references https://www.codeproject.com/Articles/13525/Fast-memor… | |
+ The code there is using the CPOL: https://www.codeproject.com/info/cpo… | |
+*/ | |
+size_t | |
+levenshteindistance(char *a, char *b) | |
+{ | |
+ size_t i, j, dc, ic, sc, r; | |
+ size_t *v0, *v1, *t; | |
+ | |
+ v0 = xmalloc((strlen(b) + 1) * sizeof(*v0)); | |
+ v1 = xmalloc((strlen(b) + 1) * sizeof(*v1)); | |
+ | |
+ for (i = 0; i <= strlen(b); i++) | |
+ v0[i] = i; | |
+ | |
+ for (i = 0; i < strlen(a); i++) { | |
+ v1[0] = i + 1; | |
+ for (j = 0; j < strlen(b); j++) { | |
+ dc = v0[j + 1] + 1; | |
+ ic = v1[j] + 1; | |
+ if (a[i] == b[j]) | |
+ sc = v0[j]; | |
+ else | |
+ sc = v0[j] + 1; | |
+ if (dc < ic && dc < sc) | |
+ v1[j + 1] = dc; | |
+ else if (ic < dc && ic < sc) | |
+ v1[j + 1] = ic; | |
+ else | |
+ v1[j + 1] = sc; | |
+ } | |
+ t = v0; | |
+ v0 = v1; | |
+ v1 = t; | |
+ } | |
+ | |
+ r = v0[strlen(b)]; | |
+ | |
+ free(v1); | |
+ free(v0); | |
+ | |
+ return r; | |
+} | |
+ | |
+/* | |
+ Remove common prefix and suffix. The pazz0distance is the sum of the l… | |
+*/ | |
+size_t | |
+pazz0distance(char *a, char *b) | |
+{ | |
+ size_t pl, sl; | |
+ size_t al, bl; | |
+ | |
+ al = strlen(a); | |
+ bl = strlen(b); | |
+ | |
+ for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++); | |
+ for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - … | |
+ | |
+ return al + bl - 2 * (pl + sl); | |
+} | |
+ | |
+struct point { | |
+ double dims[2]; | |
+}; | |
+ | |
+void | |
+sammon(double *distances, size_t l, uint32_t prng, struct point *points) | |
+{ | |
+ size_t i, j, k, t, tmax; | |
+ double *gnarf; | |
+ double c, lr; | |
+ double e1, e2, sum, lasterror; | |
+ struct point *newpoints; | |
+ | |
+ gnarf = calloc(triangularnumber(l), sizeof(*gnarf)); | |
+ newpoints = calloc(l, sizeof(*newpoints)); | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5; | |
+ points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5; | |
+ } | |
+ | |
+ c = 0; | |
+ for (i = 0; i < l; i++) { | |
+ gnarf[distanceindex(i, i)] = 0; | |
+ for (j = 0; j < i; j++) { | |
+ gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0… | |
+ c += distances[distanceindex(i, j)]; | |
+ } | |
+ } | |
+ | |
+ lasterror = 0; | |
+ for (t = 0, tmax = 1 << 12; t < tmax; t++) { | |
+ lr = 0.01; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < 2; j++) { | |
+ e1 = -2. / c; | |
+ sum = 0; | |
+ for (k = 0; k < l; k++) { | |
+ if (i == k) | |
+ continue; | |
+ sum += (distances[distanceindex(i, k)]… | |
+ } | |
+ e1 *= sum; | |
+ | |
+ e2 = -2. / c; | |
+ sum = 0; | |
+ for (k = 0; k < l; k++) { | |
+ if (i == k) | |
+ continue; | |
+ sum += 1. / (distances[distanceindex(i… | |
+ } | |
+ e2 *= sum; | |
+ | |
+ newpoints[i].dims[j] = points[i].dims[j] - lr … | |
+ } | |
+ } | |
+ | |
+ sum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < i; j++) { | |
+ gnarf[distanceindex(i, j)] = sqrt(pow(newpoint… | |
+ sum += pow(distances[distanceindex(i, j)] - gn… | |
+ } | |
+ } | |
+printf("%lf\n", sum / c); | |
+if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001) | |
+ break; | |
+lasterror = sum / c; | |
+ | |
+ memcpy(points, newpoints, l * sizeof(*points)); | |
+ } | |
+ | |
+ free(newpoints); | |
+ free(gnarf); | |
+} | |
+ | |
+int | |
+dynmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *a… | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m, m2; | |
+ size_t ti, th, huh; | |
+ size_t iter; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ struct { | |
+ size_t n1, n2, n3; | |
+ double d1, d2, d3; | |
+ } *cache; | |
+ double *dS, *dS2; | |
+ double cdS, cdS2; | |
+ size_t cm, cx; | |
+ ssize_t lx; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+*/ | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+ | |
+ cache = calloc(l, sizeof(*cache)); | |
+ if (!cache) | |
+ goto cleanup; | |
+ | |
+ dS = calloc(k, sizeof(*dS)); | |
+ if (!dS) | |
+ goto cleanup; | |
+ | |
+ dS2 = calloc(k, sizeof(*dS2)); | |
+ if (!dS2) | |
+ goto cleanup; | |
+ | |
+while (k > 3) { | |
+ for (i = 0; i < l; i++) { | |
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ d = distances[distanceindex(i, clusters[j].medoid)]; | |
+ if (d < cache[i].d1) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = cache[i].d1; | |
+ cache[i].d1 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = cache[i].n1; | |
+ cache[i].n1 = j; | |
+ } else if (d < cache[i].d2) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = j; | |
+ } else if (d < cache[i].d3) { | |
+ cache[i].d3 = d; | |
+ cache[i].n3 = j; | |
+ } | |
+ } | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) { | |
+ dS[i] = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cac… | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n2 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cac… | |
+ } | |
+ } | |
+ | |
+ lx = -1; | |
+ for (iter = 0; iter < 100; iter++) { | |
+ cdS = 0; | |
+ for (i = 0; i < l; i++) { | |
+ if (i == lx) | |
+ goto fastermsc_finished; | |
+ | |
+ for (j = 0; j < k; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto fastmsc_nexti; | |
+ } | |
+ | |
+ memcpy(dS2, dS, k * sizeof(*dS2)); | |
+ cdS2 = 0; | |
+ for (j = 0; j < l; j++) { | |
+ d = distances[distanceindex(i, j)]; | |
+ if (d < cache[j].d1) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - d … | |
+ dS2[cache[j].n1] += d / cache[j].d1 + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d2) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - ca… | |
+ dS2[cache[j].n1] += cache[j].d1 / d + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d3) { | |
+ dS2[cache[j].n1] += cache[j].d2 / cach… | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } | |
+ } | |
+ d = -INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] > d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ } | |
+ dS2[m] += cdS2; | |
+ if (dS2[m] <= 0) | |
+ continue; | |
+//printf("%lf\t%lf\n", dS2[m], cdS2); | |
+printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i); | |
+ lx = i; | |
+ clusters[m].medoid = i; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != m && cache[j].n2 != m && ca… | |
+ continue; | |
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFI… | |
+ for (n = 0; n < k; n++) { | |
+ d = distances[distanceindex(j, cluster… | |
+ if (d < cache[j].d1) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = cache[j].d1; | |
+ cache[j].d1 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = cache[j].n1; | |
+ cache[j].n1 = n; | |
+ } else if (d < cache[j].d2) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = n; | |
+ } else if (d < cache[j].d3) { | |
+ cache[j].d3 = d; | |
+ cache[j].n3 = n; | |
+ } | |
+ } | |
+ } | |
+ for (j = 0; j < k; j++) { | |
+ dS[j] = 0; | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n1 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n2 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ } | |
+printf("lx = %lu\n", i); | |
+fastmsc_nexti:; | |
+ } | |
+ if (lx == -1) | |
+ break; | |
+ } | |
+fastermsc_finished:; | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] < d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ } | |
+ if (m < k - 1) | |
+ memmove(&clusters[m], &clusters[m + 1], k - m - 1); | |
+ k--; | |
+printf("k = %lu\n", k); | |
+} | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ dsum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ d2 = d; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } else if (distances[distanceindex(clusters[j].medoid,… | |
+ d2 = distances[distanceindex(clusters[j].medoi… | |
+ } | |
+ } | |
+ if (clusters[assoc[i]].medoid != i) | |
+ dsum += 1 - d / (d2 + 0.000000000001); | |
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - … | |
+ } | |
+ printf("Silhouette = %lf\n", dsum / (l - k)); | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+int | |
+fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t… | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m, m2; | |
+ size_t ti, th, huh; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ struct { | |
+ size_t n1, n2, n3; | |
+ double d1, d2, d3; | |
+ } *cache; | |
+ double *dS, *dS2; | |
+ double cdS, cdS2; | |
+ size_t cm, cx; | |
+ ssize_t lx; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+*/ | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+ | |
+ cache = calloc(l, sizeof(*cache)); | |
+ if (!cache) | |
+ goto cleanup; | |
+ | |
+ dS = calloc(k, sizeof(*dS)); | |
+ if (!dS) | |
+ goto cleanup; | |
+ | |
+ dS2 = calloc(k, sizeof(*dS2)); | |
+ if (!dS2) | |
+ goto cleanup; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ d = distances[distanceindex(i, clusters[j].medoid)]; | |
+ if (d < cache[i].d1) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = cache[i].d1; | |
+ cache[i].d1 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = cache[i].n1; | |
+ cache[i].n1 = j; | |
+ } else if (d < cache[i].d2) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = j; | |
+ } else if (d < cache[i].d3) { | |
+ cache[i].d3 = d; | |
+ cache[i].n3 = j; | |
+ } | |
+ } | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) { | |
+ dS[i] = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cac… | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n2 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cac… | |
+ } | |
+ } | |
+ | |
+ lx = -1; | |
+ for (;;) { | |
+ cdS = 0; | |
+ for (i = 0; i < l; i++) { | |
+ if (i == lx) | |
+ goto fastermsc_finished; | |
+ | |
+ for (j = 0; j < k; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto fastmsc_nexti; | |
+ } | |
+ | |
+ memcpy(dS2, dS, k * sizeof(*dS2)); | |
+ cdS2 = 0; | |
+ for (j = 0; j < l; j++) { | |
+ d = distances[distanceindex(i, j)]; | |
+ if (d < cache[j].d1) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - d … | |
+ dS2[cache[j].n1] += d / cache[j].d1 + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d2) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - ca… | |
+ dS2[cache[j].n1] += cache[j].d1 / d + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d3) { | |
+ dS2[cache[j].n1] += cache[j].d2 / cach… | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } | |
+ } | |
+ d = -INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] > d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ } | |
+ dS2[m] += cdS2; | |
+ if (dS2[m] <= 0) | |
+ continue; | |
+//printf("%lf\t%lf\n", dS2[m], cdS2); | |
+printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i); | |
+ lx = i; | |
+ clusters[m].medoid = i; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != m && cache[j].n2 != m && ca… | |
+ continue; | |
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFI… | |
+ for (n = 0; n < k; n++) { | |
+ d = distances[distanceindex(j, cluster… | |
+ if (d < cache[j].d1) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = cache[j].d1; | |
+ cache[j].d1 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = cache[j].n1; | |
+ cache[j].n1 = n; | |
+ } else if (d < cache[j].d2) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = n; | |
+ } else if (d < cache[j].d3) { | |
+ cache[j].d3 = d; | |
+ cache[j].n3 = n; | |
+ } | |
+ } | |
+ } | |
+ for (j = 0; j < k; j++) { | |
+ dS[j] = 0; | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n1 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n2 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ } | |
+printf("lx = %lu\n", i); | |
+fastmsc_nexti:; | |
+ } | |
+ if (lx == -1) | |
+ break; | |
+ } | |
+fastermsc_finished:; | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ dsum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ d2 = d; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } else if (distances[distanceindex(clusters[j].medoid,… | |
+ d2 = distances[distanceindex(clusters[j].medoi… | |
+ } | |
+ } | |
+ if (clusters[assoc[i]].medoid != i) | |
+ dsum += 1 - d / (d2 + 0.000000000001); | |
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - … | |
+ } | |
+ printf("Silhouette = %lf\n", dsum / (l - k)); | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+int | |
+fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *… | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m, m2; | |
+ size_t ti, th; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ struct { | |
+ size_t n1, n2; | |
+ double d1, d2, d3; | |
+ } *cache; | |
+ double *dS, *dS2; | |
+ double cdS, cdS2; | |
+ size_t cm, cx; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+/* | |
+ seed the medoids using PAM BUILD | |
+*/ | |
+ d = INFINITY; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += distances[distanceindex(i, j)]; | |
+ if (dsum < d) { | |
+ m = i; | |
+ d = dsum; | |
+ } | |
+ } | |
+ if (m == -1) | |
+ goto cleanup; | |
+ ck = 0; | |
+ clusters[ck].medoid = m; | |
+ ck++; | |
+ | |
+ C = xmalloc(l * l * sizeof(*C)); | |
+ | |
+ for (; ck < k; ck++) { | |
+ memset(C, 0, l * l * sizeof(*C)); | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti; | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (i == j) | |
+ continue; | |
+ for (n = 0; n < ck; n++) { | |
+ if (clusters[n].medoid == j) | |
+ goto build_nextj; | |
+ } | |
+ d = INFINITY; | |
+ for (n = 0; n < ck; n++) { | |
+ if (distances[distanceindex(j, cluster… | |
+ d = distances[distanceindex(j,… | |
+ } | |
+ C[j * l + i] = d - distances[distanceindex(j, … | |
+ if (C[j * l + i] < 0) | |
+ C[j * l + i] = 0; | |
+build_nextj:; | |
+ } | |
+build_nexti:; | |
+ } | |
+ d = -1; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti2; | |
+ } | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += C[j * l + i]; | |
+ if (dsum > d) { | |
+ d = dsum; | |
+ m = i; | |
+ } | |
+build_nexti2:; | |
+ } | |
+ clusters[ck].medoid = m; | |
+ } | |
+ free(C); | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+*/ | |
+ | |
+ cache = calloc(l, sizeof(*cache)); | |
+ if (!cache) | |
+ goto cleanup; | |
+ | |
+ dS = calloc(k, sizeof(*dS)); | |
+ if (!dS) | |
+ goto cleanup; | |
+ | |
+ dS2 = calloc(k, sizeof(*dS2)); | |
+ if (!dS2) | |
+ goto cleanup; | |
+ | |
+ for (;;) { | |
+ for (i = 0; i < l; i++) { | |
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ d = distances[distanceindex(i, clusters[j].med… | |
+ if (d < cache[i].d1) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = cache[i].d1; | |
+ cache[i].d1 = d; | |
+ cache[i].n2 = cache[i].n1; | |
+ cache[i].n1 = j; | |
+ } else if (d < cache[i].d2) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = d; | |
+ cache[i].n2 = j; | |
+ } else if (d < cache[i].d3) { | |
+ cache[i].d3 = d; | |
+ } | |
+ } | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) { | |
+ dS[i] = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].… | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n2 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].… | |
+ } | |
+ } | |
+ | |
+ cdS = 0; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < k; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto fastmsc_nexti; | |
+ } | |
+ memcpy(dS2, dS, k * sizeof(*dS2)); | |
+ cdS2 = 0; | |
+ for (j = 0; j < l; j++) { | |
+ d = distances[distanceindex(i, j)]; | |
+ if (d < cache[j].d1) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - d … | |
+ dS2[cache[j].n1] += d / cache[j].d1 + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d2) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - ca… | |
+ dS2[cache[j].n1] += cache[j].d1 / d + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d3) { | |
+ dS2[cache[j].n1] += cache[j].d2 / cach… | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } | |
+ } | |
+ d = -INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] > d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ if (dS2[j] < d2) { | |
+ d2 = dS2[j]; | |
+ m2 = j; | |
+ } | |
+ } | |
+printf("%lf (%ld)\t%lf (%ld)\n", d, m, d2, m2); | |
+ dS2[m] += cdS2; | |
+ if (dS2[m] > cdS) { | |
+ cdS = dS2[m]; | |
+ cm = m; | |
+ cx = i; | |
+ } | |
+fastmsc_nexti:; | |
+ } | |
+ if (cdS <= 0) | |
+ break; | |
+ clusters[cm].medoid = cx; | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ dsum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ d2 = d; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } else if (distances[distanceindex(clusters[j].medoid,… | |
+ d2 = distances[distanceindex(clusters[j].medoi… | |
+ } | |
+ } | |
+ if (clusters[assoc[i]].medoid != i) | |
+ dsum += 1 - d / (d2 + 0.000000000001); | |
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - … | |
+ } | |
+ printf("Silhouette = %lf\n", dsum / (l - k)); | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+/* | |
+ Seeding of medoids based on the PAM BUILD description in https://www.c… | |
+ I was too stupid to understand other descriptions. | |
+*/ | |
+int | |
+kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t … | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m; | |
+ size_t ti, th; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+ /* seed the medoids using PAM BUILD */ | |
+ d = INFINITY; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += distances[distanceindex(i, j)]; | |
+ if (dsum < d) { | |
+ m = i; | |
+ d = dsum; | |
+ } | |
+ } | |
+ if (m == -1) | |
+ goto cleanup; | |
+ ck = 0; | |
+ clusters[ck].medoid = m; | |
+ ck++; | |
+ | |
+ C = xmalloc(l * l * sizeof(*C)); | |
+ | |
+ for (; ck < k; ck++) { | |
+ memset(C, 0, l * l * sizeof(*C)); | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti; | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (i == j) | |
+ continue; | |
+ for (n = 0; n < ck; n++) { | |
+ if (clusters[n].medoid == j) | |
+ goto build_nextj; | |
+ } | |
+ d = INFINITY; | |
+ for (n = 0; n < ck; n++) { | |
+ if (distances[distanceindex(j, cluster… | |
+ d = distances[distanceindex(j,… | |
+ } | |
+ C[j * l + i] = d - distances[distanceindex(j, … | |
+ if (C[j * l + i] < 0) | |
+ C[j * l + i] = 0; | |
+build_nextj:; | |
+ } | |
+build_nexti:; | |
+ } | |
+ d = -1; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti2; | |
+ } | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += C[j * l + i]; | |
+ if (dsum > d) { | |
+ d = dsum; | |
+ m = i; | |
+ } | |
+build_nexti2:; | |
+ } | |
+ clusters[ck].medoid = m; | |
+ } | |
+ free(C); | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+*/ | |
+ | |
+ for (;;) { | |
+ tdsum = INFINITY; | |
+ for (i = 0; i < k; i++) { | |
+ for (j = 0; j < l; j++) { | |
+ for (n = 0; n < k; n++) { | |
+ if (j == clusters[n].medoid) | |
+ goto swap_nextj; | |
+ } | |
+ dsum = 0; | |
+ for (n = 0; n < l; n++) { | |
+ if (n == j) | |
+ continue; | |
+ for (m = 0; m < k; m++) { | |
+ if (clusters[m].medoid == n) | |
+ goto swap_nextn; | |
+ } | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (m = 0; m < k; m++) { | |
+ if (distances[distanceindex(cl… | |
+ d2 = d; | |
+ d = distances[distance… | |
+ } else if (distances[distancei… | |
+ d2 = distances[distanc… | |
+ } | |
+ } | |
+/* | |
+printf("%lf\t%lf\t%lf\t%lf\n", distances[distanceindex(clusters[i].medoid, n)]… | |
+*/ | |
+ if (distances[distanceindex(clusters[i… | |
+ if (distances[distanceindex(j,… | |
+ gnarf = 0; | |
+ else | |
+ gnarf = distances[dist… | |
+ } else if (distances[distanceindex(clu… | |
+ if (distances[distanceindex(j,… | |
+ gnarf = distances[dist… | |
+ else | |
+ gnarf = d2 - d; | |
+ } else { | |
+ continue; | |
+ } | |
+ dsum += gnarf; | |
+swap_nextn:; | |
+ } | |
+ if (dsum < tdsum) { | |
+ ti = i; | |
+ th = j; | |
+ tdsum = dsum; | |
+/* | |
+printf("%lu\t%lu\t%lf\n", clusters[ti].medoid, th, tdsum); | |
+*/ | |
+ } | |
+swap_nextj:; | |
+ } | |
+ } | |
+ if (tdsum >= 0) | |
+ break; | |
+ clusters[ti].medoid = th; | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ dsum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ d2 = d; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } else if (distances[distanceindex(clusters[j].medoid,… | |
+ d2 = distances[distanceindex(clusters[j].medoi… | |
+ } | |
+ } | |
+ if (clusters[assoc[i]].medoid != i) | |
+ dsum += 1 - d / (d2 + 0.000000000001); | |
+ printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - … | |
+ if (clusters[assoc[i]].medoid == i) | |
+ assoc[i] |= 128; | |
+ } | |
+ printf("Silhouette = %lf\n", dsum / (l - k)); | |
+ | |
+/* | |
+ while (1) { | |
+ flag = 0; | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ m = assoc[i]; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid… | |
+ d = distances[distanceindex(clusters[j… | |
+ assoc[i] = j; | |
+ } | |
+ } | |
+ if (assoc[i] != m) | |
+ flag = 1; | |
+ } | |
+ if (!flag) | |
+ break; | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].dsum = INFINITY; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (assoc[i] != assoc[j]) | |
+ continue; | |
+ dsum += distanceindex(i, j); | |
+ } | |
+ if (dsum < clusters[assoc[i]].dsum) { | |
+ clusters[assoc[i]].dsum = dsum; | |
+ clusters[assoc[i]].medoid = i; | |
+ } | |
+ } | |
+ } | |
+*/ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+struct room { | |
+ struct room *p; | |
+ void *d; | |
+ size_t x, y; | |
+ size_t w, h; | |
+}; | |
+ | |
+int | |
+generaterooms_collides(size_t x1, size_t y1, size_t w1, size_t h1, | |
+ size_t x2, size_t y2, size_t w2, size_t h2, | |
+ size_t margin) | |
+{ | |
+ return !((y1 >= y2 + h2 + margin || y2 >= y1 + h1 + margin) || (x1 >= … | |
+} | |
+ | |
+int | |
+generaterooms_isfree(struct room *rs, size_t l, size_t x, size_t y, size_t w, … | |
+{ | |
+ size_t i; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ if (generaterooms_collides(rs[i].x, rs[i].y, rs[i].w, rs[i].h, | |
+ x, y, w, h, margin)) | |
+ return 0; | |
+ } | |
+ return 1; | |
+} | |
+ | |
+struct rect { | |
+ struct rect *next, *next2; | |
+ struct room *room; | |
+ size_t x1, y1; | |
+ size_t x2, y2; | |
+ size_t d; | |
+}; | |
+ | |
+struct rect * | |
+generaterooms_gnarf_randomneighbor(struct rect *x, struct rect *rs, uint32_t *… | |
+{ | |
+ struct rect *r, *result; | |
+ size_t n; | |
+ | |
+ n = 0; | |
+ result = NULL; | |
+ for (r = rs; r; r = r->next) { | |
+ if (r->y2 < x->y1 || r->y1 > x->y2 || r->x2 < x->x1 || r->x1 >… | |
+ continue; | |
+ if ((r->y2 == x->y1 || r->y1 == x->y2) && (r->x2 == x->x1 || r… | |
+ continue; | |
+ n++; | |
+ if (xorshift(prng) / (1. + UINT32_MAX) < 1. / n) | |
+ result = r; | |
+ } | |
+ | |
+ return result; | |
+} | |
+ | |
+#define ROOM_HEIGHT_MIN 3 | |
+#define ROOM_WIDTH_MIN 5 | |
+#define ROOM_MARGIN_MIN 1 | |
+#define CELL_HEIGHT_MIN (ROOM_HEIGHT_MIN + ROOM_MARGIN_MIN + 3) | |
+#define CELL_WIDTH_MIN (ROOM_WIDTH_MIN + ROOM_MARGIN_MIN + 3) | |
+size_t | |
+generaterooms_gnarf(char *host, char *port, char *selector, struct room *rs, s… | |
+{ | |
+ struct rect *queuehead, *queuetail; | |
+ struct rect *r, *t; | |
+ struct rect *rects, *walk; | |
+ size_t w, h, i, j, rl, n; | |
+ uint32_t prng; | |
+ int vertical; | |
+ struct room *room; | |
+ | |
+ prng = fnv1a(4, host, port, selector, "seed"); | |
+ | |
+ r = xmalloc(sizeof(*r)); | |
+ r->x1 = r->y1 = ROOM_MARGIN_MIN; | |
+ r->x2 = MAPWIDTH; | |
+ r->y2 = MAPHEIGHT; | |
+ r->d = 0; | |
+ | |
+ queuetail = r; | |
+ queuetail->next = NULL; | |
+ queuehead = r; | |
+ | |
+ rects = NULL; | |
+ rl = 0; | |
+ | |
+ while (queuehead) { | |
+ r = queuehead; | |
+ if (queuetail == queuehead) | |
+ queuetail = NULL; | |
+ queuehead = queuehead->next; | |
+ | |
+/* | |
+ if (r->d >= 8) { | |
+ r->next = rects; | |
+ rects = r; | |
+ rl++; | |
+ continue; | |
+ } | |
+*/ | |
+ | |
+ if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2 && r->y2 - r->y1 >= CE… | |
+ vertical = xorshift(&prng) & 1; | |
+ } else if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2) { | |
+ vertical = 0; | |
+ } else if (r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) { | |
+ vertical = 1; | |
+ } else { | |
+ r->next = rects; | |
+ rects = r; | |
+ rl++; | |
+ continue; | |
+ } | |
+ | |
+ if (vertical) { | |
+ w = r->x2 - r->x1; | |
+ h = CELL_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r… | |
+ } else { | |
+ w = CELL_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r-… | |
+ h = r->y2 - r->y1; | |
+ } | |
+ | |
+ t = xmalloc(sizeof(*t)); | |
+ t->x1 = r->x1; | |
+ t->y1 = r->y1; | |
+ t->x2 = r->x1 + w; | |
+ t->y2 = r->y1 + h; | |
+ t->d = r->d + 1; | |
+ t->next = NULL; | |
+ t->room = NULL; | |
+ | |
+ if (!queuetail) { | |
+ queuehead = t; | |
+ queuetail = t; | |
+ } else { | |
+ queuetail->next = t; | |
+ queuetail = t; | |
+ } | |
+ | |
+ t = xmalloc(sizeof(*t)); | |
+ if (vertical) { | |
+ t->x1 = r->x1; | |
+ t->y1 = r->y1 + h; | |
+ } else { | |
+ t->x1 = r->x1 + w; | |
+ t->y1 = r->y1; | |
+ } | |
+ t->x2 = r->x2; | |
+ t->y2 = r->y2; | |
+ t->d = r->d + 1; | |
+ t->next = NULL; | |
+ t->room = NULL; | |
+ | |
+ queuetail->next = t; | |
+ queuetail = t; | |
+ | |
+ free(r); | |
+ } | |
+ | |
+ if (l > rl) | |
+ l = rl; | |
+ | |
+ for (r = rects; r; r = r->next) { | |
+ if (MAPHEIGHT / 2 >= r->y1 && MAPHEIGHT / 2 < r->y2 && | |
+ MAPWIDTH / 2 >= r->x1 && MAPWIDTH / 2 < r->x2) | |
+ break; | |
+ } | |
+ | |
+ i = 0; | |
+ rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM… | |
+ rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - ROO… | |
+ rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_M… | |
+ rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_M… | |
+ rs[i].p = NULL; | |
+ r->room = &rs[i]; | |
+ | |
+ walk = r; | |
+ walk->next2 = NULL; | |
+ | |
+ i++; | |
+ for (; i < l;) { | |
+ t = generaterooms_gnarf_randomneighbor(r, rects, &prng); | |
+ if (!t || t->room) { | |
+ n = 0; | |
+ for (t = walk; t; t = t->next2) { | |
+ n++; | |
+ if (xorshift(&prng) / (1. + UINT32_MAX) < 1. /… | |
+ r = t; | |
+ | |
+ } | |
+ continue; | |
+ } | |
+ rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + t->x2 - t->x… | |
+ rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + t->y2 - t->… | |
+ rs[i].x = t->x1 + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_… | |
+ rs[i].y = t->y1 + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_… | |
+ rs[i].p = r->room; | |
+ t->room = &rs[i]; | |
+ i++; | |
+ r = t; | |
+ r->next2 = walk; | |
+ walk = r; | |
+ } | |
+ | |
+/* | |
+ for (r = rects, i = 0; i < l; i++, r = r->next) { | |
+ rs[i].w = 3 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - 3); | |
+ rs[i].h = 2 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - 2); | |
+ rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - r… | |
+ rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - r… | |
+ rs[i].p = NULL; | |
+ } | |
+*/ | |
+ | |
+ return l; | |
+} | |
+ | |
+#define ROOMS_MARGIN_MIN 1 | |
+#define ROOMS_MARGIN_MAX 3 | |
+#define MAP_MARGIN 1 | |
+size_t | |
+generaterooms_stupid(char *host, char *port, char *selector, struct room *rs, … | |
+{ | |
+ size_t i, j, ri, m, n; | |
+ ssize_t y, x; | |
+ ssize_t w, h; | |
+ char buffer[3]; | |
+ struct room *r; | |
+ uint32_t prng; | |
+ | |
+ rs[0].h = 4 + fnv1a(4, host, port, selector, "h") % 5; | |
+ rs[0].w = 4 + fnv1a(4, host, port, selector, "w") % 5; | |
+ rs[0].y = MAPHEIGHT / 2 - rs[0].h / 2; | |
+ rs[0].x = MAPWIDTH / 2 - rs[0].w / 2; | |
+ for (i = 1; i < l; i++) { | |
+ snprintf(buffer, sizeof(buffer), "%u", i); | |
+ prng = fnv1a(5, host, port, selector, buffer, "seed"); | |
+ n = 0; | |
+ do { | |
+ do { | |
+ switch (xorshift(&prng) % 3) { | |
+ case 0: | |
+ h = 3 + xorshift(&prng) % 3; | |
+ w = 3 + xorshift(&prng) % 2; | |
+ break; | |
+ case 1: | |
+ w = 3 + xorshift(&prng) % 3; | |
+ h = 4 + xorshift(&prng) % 2; | |
+ break; | |
+ case 2: | |
+ h = w = 4 + xorshift(&prng) % 5; | |
+ break; | |
+ } | |
+ m = ROOMS_MARGIN_MIN + xorshift(&prng) % (1 + … | |
+ j = xorshift(&prng) % i; | |
+ switch (xorshift(&prng) % 4) { | |
+ case 0: | |
+ x = rs[j].x; | |
+ if (w < rs[j].w) | |
+ x += xorshift(&prng) % (rs[j].… | |
+ else | |
+ x -= xorshift(&prng) % (w - rs… | |
+ y = rs[j].y - h - m; | |
+ break; | |
+ case 1: | |
+ x = rs[j].x + rs[j].w + m; | |
+ y = rs[j].y; | |
+ if (h < rs[j].h) | |
+ y += xorshift(&prng) % (rs[j].… | |
+ else | |
+ y -= xorshift(&prng) % (h - rs… | |
+ break; | |
+ case 2: | |
+ x = rs[j].x; | |
+ if (w < rs[j].w) | |
+ x += xorshift(&prng) % (rs[j].… | |
+ else | |
+ x -= xorshift(&prng) % (w - rs… | |
+ y = rs[j].y + rs[j].h + m; | |
+ break; | |
+ case 3: | |
+ x = rs[j].x - w - m; | |
+ y = rs[j].y; | |
+ if (h < rs[j].h) | |
+ y += xorshift(&prng) % (rs[j].… | |
+ else | |
+ y -= xorshift(&prng) % (h - rs… | |
+ break; | |
+ } | |
+ } while (x < MAP_MARGIN || x + w + MAP_MARGIN > MAPWID… | |
+ n++; | |
+ } while (n <= 100 && !generaterooms_isfree(rs, i, x, y, w, h, … | |
+ if (n > 100) | |
+ break; | |
+ rs[i].h = h; | |
+ rs[i].w = w; | |
+ rs[i].x = x; | |
+ rs[i].y = y; | |
+ rs[i].p = &rs[j]; | |
+ } | |
+ return i; | |
+} | |
+ | |
+#define ROOMS_GRID_ROWS 5 | |
+#define ROOMS_GRID_COLS 10 | |
+size_t | |
+generaterooms_grid(char *host, char *port, char *selector, struct room *rs, si… | |
+{ | |
+ size_t i, j, k, x, y; | |
+ unsigned char cells[ROOMS_GRID_ROWS * ROOMS_GRID_COLS]; | |
+ char buffer[3]; | |
+ uint32_t prng; | |
+ | |
+ memset(cells, 0, sizeof(cells)); | |
+ | |
+ if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS) | |
+ l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ snprintf(buffer, sizeof(buffer), "%u", i); | |
+ prng = fnv1a(5, host, port, selector, buffer, "seed"); | |
+ j = 0; | |
+ do { | |
+ k = xorshift(&prng) % (ROOMS_GRID_ROWS * ROOMS_GRID_CO… | |
+ j++; | |
+ } while (j <= 100 && cells[k]); | |
+ if (j > 100) | |
+ break; | |
+ y = k / ROOMS_GRID_COLS; | |
+ x = k % ROOMS_GRID_COLS; | |
+ cells[k] = 1; | |
+ rs[i].w = 3 + xorshift(&prng) % (1 + 8 - 2 - 3); | |
+ rs[i].h = 3 + xorshift(&prng) % (1 + 5 - 2 - 3); | |
+ rs[i].x = x * 8 + 1 + xorshift(&prng) % (1 + 8 - 2 - rs[i].w); | |
+ rs[i].y = y * 5 + 1 + xorshift(&prng) % (1 + 5 - 2 - rs[i].h); | |
+/* | |
+printf("%u\t%u\t%u\t%u\n", rs[i].w, rs[i].h, rs[i].x, rs[i].y); | |
+*/ | |
+ rs[i].p = NULL; | |
+ } | |
+ | |
+ return i; | |
+} | |
+ | |
+void | |
+generaterooms_grid2_(struct room *r, uint32_t *prng, size_t x, size_t y) | |
+{ | |
+ r->w = 3 + xorshift(prng) % (1 + 8 - 2 - 3); | |
+ r->h = 3 + xorshift(prng) % (1 + 5 - 2 - 3); | |
+ r->x = x * 8 + 1 + xorshift(prng) % (1 + 8 - 2 - r->w); | |
+ r->y = y * 5 + 1 + xorshift(prng) % (1 + 5 - 2 - r->h); | |
+ r->p = NULL; | |
+/* | |
+printf("%u\t%u\t%u\t%u\n", r->w, r->h, r->x, r->y); | |
+*/ | |
+} | |
+ | |
+size_t | |
+generaterooms_grid2(char *host, char *port, char *selector, struct room *rs, s… | |
+{ | |
+ const struct { | |
+ ssize_t y, x; | |
+ } gnarf[8] = { | |
+ { -1, 0 }, | |
+ { 0, 1 }, | |
+ { 1, 0 }, | |
+ { 0, -1 }, | |
+ { -1, 1 }, | |
+ { 1, 1 }, | |
+ { 1, -1 }, | |
+ { -1, -1 } | |
+ }; | |
+ size_t i, j, k, m, n; | |
+ ssize_t x, y; | |
+ struct room *cells[ROOMS_GRID_ROWS][ROOMS_GRID_COLS]; | |
+ char buffer[3]; | |
+ uint32_t prng; | |
+ struct room *r; | |
+ | |
+ memset(cells, 0, sizeof(cells)); | |
+ | |
+ if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS) | |
+ l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS; | |
+ | |
+ prng = fnv1a(5, host, port, selector, "seed"); | |
+ x = ROOMS_GRID_COLS / 2; | |
+ y = ROOMS_GRID_ROWS / 2; | |
+ | |
+ r = &rs[0]; | |
+ generaterooms_grid2_(r, &prng, x, y); | |
+ cells[y][x] = r; | |
+ | |
+ for (i = 1; i < l; i++) { | |
+ for (n = 0; n < 100; n++) { | |
+ m = xorshift(&prng) % 4; | |
+ x += gnarf[m].x; | |
+ y += gnarf[m].y; | |
+ if (x < 0 || x >= ROOMS_GRID_COLS || y < 0 || y >= ROO… | |
+ m = xorshift(&prng) % i; | |
+ r = &rs[m]; | |
+ x = r->x / 8; | |
+ y = r->y / 5; | |
+ } else { | |
+ generaterooms_grid2_(&rs[i], &prng, x, y); | |
+ rs[i].p = r; | |
+ cells[y][x] = r = &rs[i]; | |
+ break; | |
+ } | |
+ } | |
+ if (n >= 100) | |
+ break; | |
+ } | |
+ | |
+ return i; | |
+} | |
+ | |
+size_t | |
+distance(size_t x1, size_t y1, size_t x2, size_t y2) | |
+{ | |
+ size_t d; | |
+ | |
+ if (y1 < y2) | |
+ d = y2 - y1; | |
+ else | |
+ d = y1 - y2; | |
+ if (x1 < x2) | |
+ d += x2 - x1; | |
+ else | |
+ d += x1 - x2; | |
+ | |
+ return d; | |
+} | |
+ | |
+void | |
+nearestpoints(struct room *a, struct room *b, size_t *ax, size_t *ay, size_t *… | |
+{ | |
+ if (a->y >= b->y && a->y < b->y + b->h) { | |
+ *ay = *by = a->y; | |
+ } else if (b->y >= a->y && b->y < a->y + a->h) { | |
+ *ay = *by = b->y; | |
+ } else if (a->y >= b->y) { | |
+ *ay = a->y; | |
+ *by = b->y + b->h - 1; | |
+ } else if (b->y >= a->y) { | |
+ *ay = a->y + a->h - 1; | |
+ *by = b->y; | |
+ } | |
+ | |
+ if (a->x >= b->x && a->x < b->x + b->w) { | |
+ *ax = *bx = a->x; | |
+ } else if (b->x >= a->x && b->x < a->x + a->w) { | |
+ *ax = *bx = b->x; | |
+ } else if (a->x >= b->x) { | |
+ *ax = a->x; | |
+ *bx = b->x + b->w - 1; | |
+ } else if (b->x >= a->x) { | |
+ *ax = a->x + a->w - 1; | |
+ *bx = b->x; | |
+ } | |
+} | |
+ | |
+size_t | |
+roomdistance(struct room *a, struct room *b) | |
+{ | |
+ size_t x1, y1; | |
+ size_t x2, y2; | |
+ | |
+ nearestpoints(a, b, &x1, &y1, &x2, &y2); | |
+ | |
+ return distance(x1, y1, x2, y2); | |
+} | |
+ | |
+struct room * | |
+nearestroom(struct room *rs, size_t l, struct room *x, uint32_t prng) | |
+{ | |
+ size_t i, d, cd; | |
+ ssize_t j; | |
+ double n; | |
+ | |
+ j = -1; | |
+ d = UINT32_MAX; | |
+ for (i = 0; i < l; i++) { | |
+ if (&rs[i] == x) | |
+ continue; | |
+ if ((cd = roomdistance(&rs[i], x)) < d) { | |
+ j = i; | |
+ d = cd; | |
+ n = 1; | |
+ } else if (cd == d) { | |
+ n++; | |
+ if (xorshift(&prng) / (UINT32_MAX + 1.) < 1. / n) | |
+ j = i; | |
+ } | |
+ } | |
+ | |
+ if (j == -1) | |
+ return NULL; | |
+ | |
+ return &rs[j]; | |
+} | |
+ | |
+void | |
+connectrooms(struct room *a, struct room *b) | |
+{ | |
+ size_t i, j; | |
+ ssize_t ii; | |
+ size_t x1, y1; | |
+ size_t x2, y2; | |
+ | |
+ nearestpoints(a, b, &x1, &y1, &x2, &y2); | |
+ | |
+ if (y1 > y2) { | |
+ ii = -1; | |
+ } else if (y2 > y1) { | |
+ ii = 1; | |
+ } else { | |
+ ii = 0; | |
+ } | |
+ | |
+/* | |
+printf("%lu\t%lu\t%d\n", y1, y2, ii); | |
+*/ | |
+ for (i = y1; i != y2; i += ii) | |
+ map[i][x1].c = '.'; | |
+ | |
+ if (x1 > x2) { | |
+ ii = -1; | |
+ } else if (x2 > x1) { | |
+ ii = 1; | |
+ } else { | |
+ ii = 0; | |
+ } | |
+ | |
+ for (i = x1; i != x2; i += ii) | |
+ map[y2][i].c = '.'; | |
+} | |
+ | |
+void | |
+rendermap(void) | |
+{ | |
+ size_t i, j; | |
+ | |
+ for (i = 0; i < MAPHEIGHT; i++) { | |
+ for (j = 0; j < MAPWIDTH; j++) | |
+ putchar(map[i][j].c); | |
+ putchar('\n'); | |
+ } | |
+} | |
+ | |
+size_t | |
+diff(size_t a, size_t b) | |
+{ | |
+ if (a < b) | |
+ return b - a; | |
+ return a - b; | |
+} | |
+ | |
+/* | |
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive | |
+*/ | |
+size_t | |
+levenshteindistance_slow(char *a, char *b) | |
+{ | |
+ size_t k, l, m; | |
+ | |
+ if (!*a) | |
+ return strlen(b); | |
+ else if (!*b) | |
+ return strlen(a); | |
+ else if (*a == *b) | |
+ return levenshteindistance_slow(a + 1, b + 1); | |
+ | |
+ k = levenshteindistance_slow(a + 1, b); | |
+ l = levenshteindistance_slow(a, b + 1); | |
+ m = levenshteindistance_slow(a + 1, b + 1); | |
+ | |
+ if (k < l && k < m) | |
+ return 1 + k; | |
+ else if (l < k && l < m) | |
+ return 1 + l; | |
+ else | |
+ return 1 + m; | |
+} | |
+ | |
+ssize_t | |
+placeitems_kmedoid(char *host, char *port, char *selector, struct gopherentry … | |
+{ | |
+ size_t i, j; | |
+ ssize_t r; | |
+ double *distances, d; | |
+ double *mdists; | |
+ struct { | |
+ double selector; | |
+ double segment; | |
+ double index; | |
+ } *distanceparts, maxdistanceparts; | |
+ uint32_t prng; | |
+ struct point *points; | |
+ | |
+ distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts)); | |
+ | |
+ memset(&maxdistanceparts, 0, sizeof(maxdistanceparts)); | |
+ for (i = 0; i < l; i++) { | |
+ memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanc… | |
+ for (j = 0; j < i; j++) { | |
+ distanceparts[distanceindex(i, j)].selector = levensht… | |
+ distanceparts[distanceindex(i, j)].segment = diff(g[i]… | |
+ distanceparts[distanceindex(i, j)].index = diff(g[i].i… | |
+ | |
+ if (distanceparts[distanceindex(i, j)].selector > maxd… | |
+ maxdistanceparts.selector = distanceparts[dist… | |
+ if (distanceparts[distanceindex(i, j)].segment > maxdi… | |
+ maxdistanceparts.segment = distanceparts[dista… | |
+ if (distanceparts[distanceindex(i, j)].index > maxdist… | |
+ maxdistanceparts.index = distanceparts[distanc… | |
+ } | |
+ } | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < i; j++) { | |
+ distanceparts[distanceindex(i, j)].selector /= maxdist… | |
+ distanceparts[distanceindex(i, j)].segment /= maxdista… | |
+ distanceparts[distanceindex(i, j)].index /= maxdistanc… | |
+ } | |
+ } | |
+ | |
+ distances = xmalloc(triangularnumber(l) * sizeof(*distances)); | |
+ prng = fnv1a(4, host, port, selector, "wiggle"); | |
+ for (i = 0; i < l; i++) { | |
+ distances[distanceindex(i, i)] = 0; | |
+ for (j = 0; j < i; j++) | |
+ distances[distanceindex(i, j)] = + distanceparts[dista… | |
+ } | |
+ free(distanceparts); | |
+ | |
+ r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector… | |
+ | |
+ points = calloc(l, sizeof(*points)); | |
+ sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), poi… | |
+ | |
+ for (i = 0; i < l; i++) | |
+ printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i… | |
+ | |
+ free(points); | |
+ free(distances); | |
+ | |
+ return r; | |
+} | |
+ | |
+size_t | |
+placeitems_hash(char *host, char *port, char *selector, struct gopherentry *g,… | |
+{ | |
+ size_t i; | |
+ | |
+ for (i = 0; i < l; i++) | |
+ assocs[i] = fnv1a(6, host, port, selector, g[i].fields[1], g[i… | |
+ | |
+ return k; | |
+} | |
+ | |
+size_t py, px; | |
+ | |
+enum { | |
+ Portal, | |
+ StaircaseDown, | |
+ Bookshelf | |
+}; | |
+ | |
+void | |
+generatemap(char *host, char *port, char *selector, struct gopherentry *g, siz… | |
+{ | |
+ size_t i, j, k; | |
+ size_t x, y; | |
+ ssize_t n, m; | |
+ size_t *cassocs; | |
+ struct room *rooms, *r; | |
+ struct { | |
+ unsigned char x, y; | |
+ } positions[3]; | |
+ uint32_t prng; | |
+ char buffer[3]; | |
+ | |
+ memset(map, 0, sizeof(map)); | |
+ | |
+ for (i = 0; i < MAPHEIGHT; i++) { | |
+ for (j = 0; j < MAPWIDTH; j++) { | |
+ map[i][j].c = '#'; | |
+ } | |
+ } | |
+ k = 7; | |
+ rooms = calloc(k, sizeof(*rooms)); | |
+ if (!rooms) | |
+ return; | |
+ k = generaterooms_gnarf(host, port, selector, rooms, k); | |
+ | |
+ cassocs = calloc(l, sizeof(*cassocs)); | |
+ if (!cassocs) | |
+ goto cleanup; | |
+ | |
+ k = placeitems_kmedoid(host, port, selector, g, l, cassocs, k); | |
+ | |
+ /* Insert rooms */ | |
+ for (i = 0; i < k; i++) { | |
+ for (y = rooms[i].y; y < rooms[i].y + rooms[i].h; y++) { | |
+ for (x = rooms[i].x; x < rooms[i].x + rooms[i].w; x++) | |
+ map[y][x].c = '.'; | |
+ } | |
+ } | |
+ | |
+ /* Insert connections */ | |
+ for (i = 0; i < k; i++) { | |
+ if (rooms[i].p) | |
+ connectrooms(&rooms[i], rooms[i].p); | |
+/* | |
+ if ((r = nearestroom(rooms, i, &rooms[i], fnv1a(4, host, port,… | |
+ connectrooms(&rooms[i], r); | |
+*/ | |
+ } | |
+ | |
+ /* Insert items */ | |
+ for (i = 0; i < k; i++) { | |
+ snprintf(buffer, sizeof(buffer), "%d", i); | |
+ prng = fnv1a(4, host, port, selector, buffer); | |
+ for (j = 0; j < 3; j++) { | |
+ /* Ugly brute force strategy... I'm sure there is a ma… | |
+ do { | |
+ positions[j].x = rooms[i].x + xorshift(&prng) … | |
+ positions[j].y = rooms[i].y + xorshift(&prng) … | |
+ for (m = 0; m < j && positions[m].x != positio… | |
+ positions[m].y != positio… | |
+ } while (m < j); | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cassocs[j] != i) | |
+ continue; | |
+/* | |
+ printf("%s\t%lu\n", g[j].fields[1], cassocs[j]); | |
+*/ | |
+ switch (g[j].type) { | |
+ case '0': | |
+ x = positions[Bookshelf].x; | |
+ y = positions[Bookshelf].y; | |
+ map[y][x].c = 'E'; | |
+ break; | |
+ case '1': | |
+ if (strcmp(g[j].fields[2], host) || strcmp(g[j… | |
+ x = positions[Portal].x; | |
+ y = positions[Portal].y; | |
+ map[y][x].c = 'O'; | |
+ } else { | |
+ x = positions[StaircaseDown].x; | |
+ y = positions[StaircaseDown].y; | |
+ if (map[y][x].data) | |
+ map[y][x].c = 'L'; | |
+ else | |
+ map[y][x].c = '>'; | |
+ } | |
+ break; | |
+ } | |
+ g[j].next = map[y][x].data; | |
+ map[y][x].data = &g[j]; | |
+ } | |
+ } | |
+ | |
+ free(cassocs); | |
+cleanup: | |
+ free(rooms); | |
+} | |
+ | |
+char gopherbuffer[16 * 1024 * 1024]; | |
+ | |
+void | |
+interact(void) | |
+{ | |
+} | |
+ | |
+void | |
+loop(void) | |
+{ | |
+ char c; | |
+ | |
+ do { | |
+ switch (c) { | |
+ case 'h': | |
+ break; | |
+ case 'j': | |
+ break; | |
+ case 'k': | |
+ break; | |
+ case 'l': | |
+ break; | |
+ case ' ': | |
+ interact(); | |
+ break; | |
+ } | |
+ } while (1); | |
+} | |
+ | |
+int | |
+main(int argc, char *argv[]) | |
+{ | |
+ ssize_t l; | |
+ struct gopherentry *g; | |
+ char *host, *port, *selector; | |
+ struct termios termios, ntermios; | |
+ | |
+ if (argc < 2) { | |
+ printf("%s host [port [selector]]\n", argv[0]); | |
+ return 1; | |
+ } | |
+ | |
+/* | |
+ memset(&termios, 0, sizeof(termios)); | |
+ if (tcgetattr(0, &termios) == -1) | |
+ return 1; | |
+ | |
+ memcpy(&ntermios, &termios, sizeof(termios)); | |
+ ntermios.c_lflag &= ~(ICANON | ECHO); | |
+ ntermios.c_cc[VMIN] = 1; | |
+ ntermios.c_cc[VTIME] = 0; | |
+ | |
+ if (tcsetattr(0, TCSANOW, &ntermios) == -1) | |
+ return 1; | |
+*/ | |
+ | |
+ host = argv[1]; | |
+ port = (argc > 2) ? argv[2] : "70"; | |
+ selector = (argc > 3) ? argv[3] : ""; | |
+ | |
+ do { | |
+ l = gopher_request(host, port, selector, gopherbuffer, sizeof(… | |
+ if (l == -1) | |
+ return 1; | |
+ | |
+ l = gopher_parsemenu(gopherbuffer, &g); | |
+ if (l == -1) | |
+ return 1; | |
+ | |
+ l = gopher_removeentries(g, l); | |
+ | |
+ generatemap(host, port, selector, g, l); | |
+/* | |
+ rendermap(); | |
+*/ | |
+ | |
+/* | |
+ loop(); | |
+*/ | |
+ } while (0); | |
+ | |
+/* | |
+ if (tcsetattr(0, TCSANOW, &termios) == -1) | |
+ return 1; | |
+*/ | |
+ | |
+ return 0; | |
+} | |
diff --git a/gopher/gopher-clustering/gopher-clustering2.c b/gopher/gopher-clus… | |
@@ -0,0 +1,1165 @@ | |
+#define _POSIX_C_SOURCE 200112L | |
+ | |
+#include <assert.h> | |
+#include <stdarg.h> | |
+#include <stdint.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+#include <math.h> | |
+ | |
+#include <netdb.h> | |
+#include <sys/socket.h> | |
+#include <sys/types.h> | |
+#include <unistd.h> | |
+ | |
+uint32_t | |
+fnv1a(int n,...) | |
+{ | |
+ int i; | |
+ char *s; | |
+ va_list l; | |
+ uint32_t h; | |
+ | |
+ h = 0x811c9dc5; | |
+ | |
+ va_start(l, n); | |
+ for (i = 0; i < n; i++) { | |
+ for (s = va_arg(l, char*); *s; s++) { | |
+ h ^= *s; | |
+ h *= 0x01000193; | |
+ } | |
+ } | |
+ va_end(l); | |
+ | |
+ return h; | |
+} | |
+ | |
+uint32_t | |
+xorshift(uint32_t *s) | |
+{ | |
+ *s ^= *s << 13; | |
+ *s ^= *s >> 17; | |
+ *s ^= *s << 5; | |
+ return *s; | |
+} | |
+ | |
+/* | |
+ https://github.com/skeeto/hash-prospector/issues/19#issuecomment-11201… | |
+*/ | |
+uint32_t | |
+mix(uint32_t x) | |
+{ | |
+ x ^= x >> 16; | |
+ x *= 0x21f0aaad; | |
+ x ^= x >> 15; | |
+ x *= 0x735a2d97; | |
+ x ^= x >> 16; | |
+ return x; | |
+} | |
+ | |
+struct gopherentry { | |
+ struct gopherentry *next; | |
+ char type; | |
+ char *fields[4]; | |
+ size_t i, s; | |
+}; | |
+ | |
+char * | |
+gopher_fieldend(char *s) | |
+{ | |
+ for (; *s; s++) { | |
+ if (*s == '\t') | |
+ break; | |
+ if (*s == '\r' && *(s+1) == '\n') | |
+ break; | |
+ if (*s == '\n') | |
+ break; | |
+ } | |
+ return s; | |
+} | |
+ | |
+char * | |
+gopher_nextentry(char *s) | |
+{ | |
+ char *c; | |
+ | |
+ if (c = strchr(s, '\n')) | |
+ return c + 1; | |
+ return NULL; | |
+} | |
+ | |
+void * | |
+xmalloc(size_t s) | |
+{ | |
+ void *p; | |
+ | |
+ if (!(p = malloc(s))) { | |
+ perror(NULL); | |
+ exit(1); | |
+ } | |
+ return p; | |
+} | |
+ | |
+void * | |
+xrealloc(void *p, size_t s) | |
+{ | |
+ if (!(p = realloc(p, s))) { | |
+ perror(NULL); | |
+ exit(1); | |
+ } | |
+ return p; | |
+} | |
+ | |
+/* Allow a lot of ugly things... */ | |
+size_t | |
+gopher_parsemenu(char *d, struct gopherentry **g) | |
+{ | |
+ ssize_t gl; | |
+ size_t fl, i, s; | |
+ char *c, *p, pt; | |
+ struct gopherentry *cg; | |
+ | |
+ gl = 0; | |
+ cg = NULL; | |
+ | |
+ s = 0; | |
+ pt = 0; | |
+ for (c = d; c && *c; c = gopher_nextentry(c)) { | |
+ if (*c == '.') | |
+ break; | |
+ if (*c == '\n') | |
+ continue; | |
+ | |
+ gl++; | |
+ cg = xrealloc(cg, gl * sizeof(struct gopherentry)); | |
+ memset(&cg[gl-1], 0, sizeof(struct gopherentry)); | |
+ | |
+ if (*c != 'i' && pt == 'i') | |
+ s++; | |
+ pt = *c; | |
+ | |
+ cg[gl-1].type = *c; | |
+ cg[gl-1].i = gl-1; | |
+ cg[gl-1].s = s; | |
+ c++; | |
+ | |
+ for (i = 0; i < 4; i++) { | |
+ p = gopher_fieldend(c); | |
+ fl = p - c; | |
+ cg[gl-1].fields[i] = xmalloc(fl + 1); | |
+ memcpy(cg[gl-1].fields[i], c, fl); | |
+ cg[gl-1].fields[i][fl] = '\0'; | |
+ if (*p != '\t') | |
+ break; | |
+ c = p + 1; | |
+ } | |
+ } | |
+ s++; | |
+ *g = cg; | |
+ return gl; | |
+} | |
+ | |
+ssize_t | |
+gopher_removeentries(struct gopherentry *g, size_t l) | |
+{ | |
+ size_t i, j; | |
+ | |
+ for (i = 0; i < l;) { | |
+ if (g[i].type == 'i' || g[i].type == '3' || !g[i].fields[2] ||… | |
+ l--; | |
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); | |
+ continue; | |
+ } | |
+ for (j = 0; j < i; j++) { | |
+ if (!strcmp(g[i].fields[1], g[j].fields[1]) && | |
+ !strcmp(g[i].fields[2], g[j].fields[2]) && | |
+ !strcmp(g[i].fields[3], g[j].fields[3])) | |
+ break; | |
+ } | |
+ if (j < i) { | |
+ l--; | |
+ memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); | |
+ continue; | |
+ } | |
+ i++; | |
+ } | |
+ | |
+ return l; | |
+} | |
+ | |
+ssize_t | |
+gopher_request(char *host, char *port, char *selector, char *buffer, size_t l) | |
+{ | |
+ int fd; | |
+ struct addrinfo hints; | |
+ struct addrinfo *r, *rp; | |
+ ssize_t n, result, rn; | |
+ | |
+ memset(&hints, 0, sizeof(hints)); | |
+ hints.ai_family = AF_UNSPEC; | |
+ hints.ai_socktype = SOCK_STREAM; | |
+ | |
+ if (getaddrinfo(host, port, &hints, &r) != 0) | |
+ return -1; | |
+ | |
+ for (rp = r; rp; rp = rp->ai_next) { | |
+ if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protoc… | |
+ continue; | |
+ if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1) | |
+ break; | |
+ close(fd); | |
+ } | |
+ freeaddrinfo(r); | |
+ if (!rp) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ if (write(fd, selector, strlen(selector)) != strlen(selector)) | |
+ goto cleanup; | |
+ | |
+ if (write(fd, "\r\n", 2) != 2) | |
+ goto cleanup; | |
+ | |
+ n = 0; | |
+ while (1) { | |
+ if (n + 512 > l - 1) | |
+ goto cleanup; | |
+ if ((rn = read(fd, &buffer[n], 512)) == -1) | |
+ goto cleanup; | |
+ else if (!rn) | |
+ break; | |
+ n += rn; | |
+ } | |
+ | |
+ buffer[n] = '\0'; | |
+ result = n; | |
+ | |
+cleanup: | |
+ close(fd); | |
+ | |
+ return result; | |
+} | |
+ | |
+size_t | |
+triangularnumber(size_t n) | |
+{ | |
+ return (n * n + n) / 2; | |
+} | |
+ | |
+size_t | |
+distanceindex(size_t i, size_t j) | |
+{ | |
+ size_t t; | |
+ | |
+ if (i < j) { | |
+ t = i; | |
+ i = j; | |
+ j = t; | |
+ } | |
+ | |
+ return triangularnumber(i) + j; | |
+} | |
+ | |
+struct point { | |
+ double dims[2]; | |
+}; | |
+ | |
+#define SAMMON_MAPPING_MAX_ITER (1 << 13) | |
+#define SAMMON_MAPPING_LR (0.01) | |
+void | |
+sammon(double *distances, size_t l, uint32_t prng, struct point *points) | |
+{ | |
+ size_t i, j, k, t, tmax; | |
+ double *gnarf; | |
+ double c, lr; | |
+ double e1, e2, sum, lasterror; | |
+ struct point *newpoints; | |
+ | |
+ gnarf = calloc(triangularnumber(l), sizeof(*gnarf)); | |
+ newpoints = calloc(l, sizeof(*newpoints)); | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5; | |
+ points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5; | |
+ } | |
+ | |
+ c = 0; | |
+ for (i = 0; i < l; i++) { | |
+ gnarf[distanceindex(i, i)] = 0; | |
+ for (j = 0; j < i; j++) { | |
+ gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0… | |
+ c += distances[distanceindex(i, j)]; | |
+ } | |
+ } | |
+ | |
+ lasterror = 0; | |
+ for (t = 0, tmax = SAMMON_MAPPING_MAX_ITER; t < tmax; t++) { | |
+ lr = SAMMON_MAPPING_LR; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < 2; j++) { | |
+ e1 = -2. / c; | |
+ sum = 0; | |
+ for (k = 0; k < l; k++) { | |
+ if (i == k) | |
+ continue; | |
+ sum += (distances[distanceindex(i, k)]… | |
+ } | |
+ e1 *= sum; | |
+ | |
+ e2 = -2. / c; | |
+ sum = 0; | |
+ for (k = 0; k < l; k++) { | |
+ if (i == k) | |
+ continue; | |
+ sum += 1. / (distances[distanceindex(i… | |
+ } | |
+ e2 *= sum; | |
+ | |
+ newpoints[i].dims[j] = points[i].dims[j] - lr … | |
+ } | |
+ } | |
+ | |
+ sum = 0; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < i; j++) { | |
+ gnarf[distanceindex(i, j)] = sqrt(pow(newpoint… | |
+ sum += pow(distances[distanceindex(i, j)] - gn… | |
+ } | |
+ } | |
+ | |
+ fprintf(stderr, "sammon mapping error: %lf\n", sum / c); | |
+ if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001) | |
+ break; | |
+ lasterror = sum / c; | |
+ | |
+ memcpy(points, newpoints, l * sizeof(*points)); | |
+ } | |
+ | |
+ free(newpoints); | |
+ free(gnarf); | |
+} | |
+ | |
+int | |
+fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t… | |
+{ | |
+ size_t i, j, n, ck, grimmel; | |
+ ssize_t m1, m, m2; | |
+ size_t ti, th, huh; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ struct { | |
+ size_t n1, n2, n3; | |
+ double d1, d2, d3; | |
+ } *cache; | |
+ double *dS, *dS2; | |
+ double cdS, cdS2; | |
+ size_t cm, cx; | |
+ ssize_t lx; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+*/ | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+ | |
+ cache = calloc(l, sizeof(*cache)); | |
+ if (!cache) | |
+ goto cleanup; | |
+ | |
+ dS = calloc(k, sizeof(*dS)); | |
+ if (!dS) | |
+ goto cleanup; | |
+ | |
+ dS2 = calloc(k, sizeof(*dS2)); | |
+ if (!dS2) | |
+ goto cleanup; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ d = distances[distanceindex(i, clusters[j].medoid)]; | |
+ if (d < cache[i].d1) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = cache[i].d1; | |
+ cache[i].d1 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = cache[i].n1; | |
+ cache[i].n1 = j; | |
+ } else if (d < cache[i].d2) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = d; | |
+ cache[i].n3 = cache[i].n2; | |
+ cache[i].n2 = j; | |
+ } else if (d < cache[i].d3) { | |
+ cache[i].d3 = d; | |
+ cache[i].n3 = j; | |
+ } | |
+ } | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) { | |
+ dS[i] = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cac… | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n2 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cac… | |
+ } | |
+ } | |
+ | |
+ lx = -1; | |
+ for (grimmel = 0;grimmel < 32;grimmel++) { | |
+ cdS = 0; | |
+ for (i = 0; i < l; i++) { | |
+ if (i == lx) | |
+ goto fastermsc_finished; | |
+ | |
+ for (j = 0; j < k; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto fastmsc_nexti; | |
+ } | |
+ | |
+ memcpy(dS2, dS, k * sizeof(*dS2)); | |
+ cdS2 = 0; | |
+ for (j = 0; j < l; j++) { | |
+ d = distances[distanceindex(i, j)]; | |
+ if (d < cache[j].d1) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - d … | |
+ dS2[cache[j].n1] += d / cache[j].d1 + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d2) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - ca… | |
+ dS2[cache[j].n1] += cache[j].d1 / d + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d3) { | |
+ dS2[cache[j].n1] += cache[j].d2 / cach… | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } | |
+ } | |
+ d = -INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] > d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ } | |
+ dS2[m] += cdS2; | |
+ if (dS2[m] <= 0) | |
+ continue; | |
+ lx = i; | |
+ clusters[m].medoid = i; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != m && cache[j].n2 != m && ca… | |
+ continue; | |
+ cache[j].d1 = cache[j].d2 = cache[j].d3 = INFI… | |
+ for (n = 0; n < k; n++) { | |
+ d = distances[distanceindex(j, cluster… | |
+ if (d < cache[j].d1) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = cache[j].d1; | |
+ cache[j].d1 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = cache[j].n1; | |
+ cache[j].n1 = n; | |
+ } else if (d < cache[j].d2) { | |
+ cache[j].d3 = cache[j].d2; | |
+ cache[j].d2 = d; | |
+ cache[j].n3 = cache[j].n2; | |
+ cache[j].n2 = n; | |
+ } else if (d < cache[j].d3) { | |
+ cache[j].d3 = d; | |
+ cache[j].n3 = n; | |
+ } | |
+ } | |
+ } | |
+ for (j = 0; j < k; j++) { | |
+ dS[j] = 0; | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n1 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ for (n = 0; n < l; n++) { | |
+ if (cache[n].n2 != j) | |
+ continue; | |
+ dS[j] += cache[n].d1 / cache[n].d2 - c… | |
+ } | |
+ } | |
+fastmsc_nexti:; | |
+ } | |
+/* | |
+ if (lx == -1) | |
+ break; | |
+*/ | |
+ } | |
+fastermsc_finished:; | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ if (clusters[assoc[i]].medoid == i) | |
+ assoc[i] |= 128; | |
+ } | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+int | |
+fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *… | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m, m2; | |
+ size_t ti, th; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ struct { | |
+ size_t n1, n2; | |
+ double d1, d2, d3; | |
+ } *cache; | |
+ double *dS, *dS2; | |
+ double cdS, cdS2; | |
+ double _e, _y, _z, _s; | |
+ size_t cm, cx; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+/* | |
+ seed the medoids using PAM BUILD | |
+ d = INFINITY; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += distances[distanceindex(i, j)]; | |
+ if (dsum < d) { | |
+ m = i; | |
+ d = dsum; | |
+ } | |
+ } | |
+ if (m == -1) | |
+ goto cleanup; | |
+ ck = 0; | |
+ clusters[ck].medoid = m; | |
+ ck++; | |
+ | |
+ C = xmalloc(l * l * sizeof(*C)); | |
+ | |
+ for (; ck < k; ck++) { | |
+ memset(C, 0, l * l * sizeof(*C)); | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti; | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (i == j) | |
+ continue; | |
+ for (n = 0; n < ck; n++) { | |
+ if (clusters[n].medoid == j) | |
+ goto build_nextj; | |
+ } | |
+ d = INFINITY; | |
+ for (n = 0; n < ck; n++) { | |
+ if (distances[distanceindex(j, cluster… | |
+ d = distances[distanceindex(j,… | |
+ } | |
+ C[j * l + i] = d - distances[distanceindex(j, … | |
+ if (C[j * l + i] < 0) | |
+ C[j * l + i] = 0; | |
+build_nextj:; | |
+ } | |
+build_nexti:; | |
+ } | |
+ d = -1; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti2; | |
+ } | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += C[j * l + i]; | |
+ if (dsum > d) { | |
+ d = dsum; | |
+ m = i; | |
+ } | |
+build_nexti2:; | |
+ } | |
+ clusters[ck].medoid = m; | |
+ } | |
+ free(C); | |
+*/ | |
+ | |
+/* | |
+ Seed the medoids randomly | |
+*/ | |
+ for (i = n = 0; i < l; i++) { | |
+ if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) | |
+ clusters[n++].medoid = i; | |
+ if (n == k) | |
+ break; | |
+ } | |
+ | |
+ cache = calloc(l, sizeof(*cache)); | |
+ if (!cache) | |
+ goto cleanup; | |
+ | |
+ dS = calloc(k, sizeof(*dS)); | |
+ if (!dS) | |
+ goto cleanup; | |
+ | |
+ dS2 = calloc(k, sizeof(*dS2)); | |
+ if (!dS2) | |
+ goto cleanup; | |
+ | |
+ for (;;) { | |
+ for (i = 0; i < l; i++) { | |
+ cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ d = distances[distanceindex(i, clusters[j].med… | |
+ if (d < cache[i].d1) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = cache[i].d1; | |
+ cache[i].d1 = d; | |
+ cache[i].n2 = cache[i].n1; | |
+ cache[i].n1 = j; | |
+ } else if (d < cache[i].d2) { | |
+ cache[i].d3 = cache[i].d2; | |
+ cache[i].d2 = d; | |
+ cache[i].n2 = j; | |
+ } else if (d < cache[i].d3) { | |
+ cache[i].d3 = d; | |
+ } | |
+ } | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) { | |
+ dS[i] = 0; | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n1 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].… | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (cache[j].n2 != i) | |
+ continue; | |
+ dS[i] += cache[j].d1 / cache[j].d2 - cache[j].… | |
+ } | |
+ } | |
+ | |
+ cdS = 0; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < k; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto fastmsc_nexti; | |
+ } | |
+ memcpy(dS2, dS, k * sizeof(*dS2)); | |
+ cdS2 = _e = 0; | |
+ for (j = 0; j < l; j++) { | |
+ d = distances[distanceindex(i, j)]; | |
+ if (d < cache[j].d1) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - d … | |
+ dS2[cache[j].n1] += d / cache[j].d1 + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d2) { | |
+ cdS2 += cache[j].d1 / cache[j].d2 - ca… | |
+ dS2[cache[j].n1] += cache[j].d1 / d + … | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } else if (d < cache[j].d3) { | |
+ dS2[cache[j].n1] += cache[j].d2 / cach… | |
+ dS2[cache[j].n2] += cache[j].d1 / cach… | |
+ } | |
+ } | |
+ d = -INFINITY; | |
+ d2 = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (dS2[j] > d) { | |
+ d = dS2[j]; | |
+ m = j; | |
+ } | |
+ if (dS2[j] < d2) { | |
+ d2 = dS2[j]; | |
+ m2 = j; | |
+ } | |
+ } | |
+ dS2[m] += cdS2; | |
+ if (dS2[m] > cdS) { | |
+ cdS = dS2[m]; | |
+ cm = m; | |
+ cx = i; | |
+ } | |
+fastmsc_nexti:; | |
+ } | |
+ /* Should be 0. I guess some floating point problems? */ | |
+ if (cdS <= 0.00000000000001) | |
+ break; | |
+ clusters[cm].medoid = cx; | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ if (clusters[assoc[i]].medoid == i) | |
+ assoc[i] |= 128; | |
+ } | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+/* | |
+ Seeding of medoids based on the PAM BUILD description in https://www.c… | |
+ I was too stupid to understand other descriptions. | |
+*/ | |
+int | |
+kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t … | |
+{ | |
+ size_t i, j, n, ck; | |
+ ssize_t m1, m; | |
+ size_t ti, th; | |
+ double dsum, d, d2, gnarf, tdsum; | |
+ struct { | |
+ size_t medoid; | |
+ size_t n; | |
+ double dsum; | |
+ } *clusters; | |
+ int flag, result; | |
+ uint32_t r; | |
+ double *C; | |
+ | |
+ if (!l) | |
+ return 0; | |
+ | |
+ if (l < k) | |
+ k = l; | |
+ | |
+ clusters = calloc(k, sizeof(*clusters)); | |
+ if (!clusters) | |
+ return -1; | |
+ | |
+ result = -1; | |
+ | |
+ /* seed the medoids using PAM BUILD */ | |
+ d = INFINITY; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += distances[distanceindex(i, j)]; | |
+ if (dsum < d) { | |
+ m = i; | |
+ d = dsum; | |
+ } | |
+ } | |
+ if (m == -1) | |
+ goto cleanup; | |
+ ck = 0; | |
+ clusters[ck].medoid = m; | |
+ ck++; | |
+ | |
+ C = xmalloc(l * l * sizeof(*C)); | |
+ | |
+ for (; ck < k; ck++) { | |
+ memset(C, 0, l * l * sizeof(*C)); | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti; | |
+ } | |
+ for (j = 0; j < l; j++) { | |
+ if (i == j) | |
+ continue; | |
+ for (n = 0; n < ck; n++) { | |
+ if (clusters[n].medoid == j) | |
+ goto build_nextj; | |
+ } | |
+ d = INFINITY; | |
+ for (n = 0; n < ck; n++) { | |
+ if (distances[distanceindex(j, cluster… | |
+ d = distances[distanceindex(j,… | |
+ } | |
+ C[j * l + i] = d - distances[distanceindex(j, … | |
+ if (C[j * l + i] < 0) | |
+ C[j * l + i] = 0; | |
+build_nextj:; | |
+ } | |
+build_nexti:; | |
+ } | |
+ d = -1; | |
+ m = -1; | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < ck; j++) { | |
+ if (clusters[j].medoid == i) | |
+ goto build_nexti2; | |
+ } | |
+ dsum = 0; | |
+ for (j = 0; j < l; j++) | |
+ dsum += C[j * l + i]; | |
+ if (dsum > d) { | |
+ d = dsum; | |
+ m = i; | |
+ } | |
+build_nexti2:; | |
+ } | |
+ clusters[ck].medoid = m; | |
+ } | |
+ free(C); | |
+ | |
+ for (;;) { | |
+ tdsum = INFINITY; | |
+ for (i = 0; i < k; i++) { | |
+ for (j = 0; j < l; j++) { | |
+ for (n = 0; n < k; n++) { | |
+ if (j == clusters[n].medoid) | |
+ goto swap_nextj; | |
+ } | |
+ dsum = 0; | |
+ for (n = 0; n < l; n++) { | |
+ if (n == j) | |
+ continue; | |
+ for (m = 0; m < k; m++) { | |
+ if (clusters[m].medoid == n) | |
+ goto swap_nextn; | |
+ } | |
+ d = INFINITY; | |
+ d2 = INFINITY; | |
+ for (m = 0; m < k; m++) { | |
+ if (distances[distanceindex(cl… | |
+ d2 = d; | |
+ d = distances[distance… | |
+ } else if (distances[distancei… | |
+ d2 = distances[distanc… | |
+ } | |
+ } | |
+ if (distances[distanceindex(clusters[i… | |
+ if (distances[distanceindex(j,… | |
+ gnarf = 0; | |
+ else | |
+ gnarf = distances[dist… | |
+ } else if (distances[distanceindex(clu… | |
+ if (distances[distanceindex(j,… | |
+ gnarf = distances[dist… | |
+ else | |
+ gnarf = d2 - d; | |
+ } else { | |
+ continue; | |
+ } | |
+ dsum += gnarf; | |
+swap_nextn:; | |
+ } | |
+ if (dsum < tdsum) { | |
+ ti = i; | |
+ th = j; | |
+ tdsum = dsum; | |
+ } | |
+swap_nextj:; | |
+ } | |
+ } | |
+ if (tdsum >= 0) | |
+ break; | |
+ clusters[ti].medoid = th; | |
+ } | |
+ | |
+ for (i = 0; i < k; i++) | |
+ clusters[i].n = 0; | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ d = INFINITY; | |
+ for (j = 0; j < k; j++) { | |
+ if (distances[distanceindex(clusters[j].medoid, i)] < … | |
+ assoc[i] = j; | |
+ d = distances[distanceindex(clusters[j].medoid… | |
+ } | |
+ } | |
+ clusters[assoc[i]].n++; | |
+ } | |
+ | |
+ for (i = 0; i < l; i++) { | |
+ if (clusters[assoc[i]].medoid == i) | |
+ assoc[i] |= 128; | |
+ } | |
+ | |
+ result = k; | |
+cleanup: | |
+ free(clusters); | |
+ return result; | |
+} | |
+ | |
+size_t | |
+diff(size_t a, size_t b) | |
+{ | |
+ if (a < b) | |
+ return b - a; | |
+ return a - b; | |
+} | |
+ | |
+/* | |
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_… | |
+ Which references https://www.codeproject.com/Articles/13525/Fast-memor… | |
+ The code there is using the CPOL: https://www.codeproject.com/info/cpo… | |
+*/ | |
+size_t | |
+levenshteindistance(char *a, char *b) | |
+{ | |
+ size_t i, j, dc, ic, sc, r; | |
+ size_t *v0, *v1, *t; | |
+ | |
+ v0 = xmalloc((strlen(b) + 1) * sizeof(*v0)); | |
+ v1 = xmalloc((strlen(b) + 1) * sizeof(*v1)); | |
+ | |
+ for (i = 0; i <= strlen(b); i++) | |
+ v0[i] = i; | |
+ | |
+ for (i = 0; i < strlen(a); i++) { | |
+ v1[0] = i + 1; | |
+ for (j = 0; j < strlen(b); j++) { | |
+ dc = v0[j + 1] + 1; | |
+ ic = v1[j] + 1; | |
+ if (a[i] == b[j]) | |
+ sc = v0[j]; | |
+ else | |
+ sc = v0[j] + 1; | |
+ if (dc < ic && dc < sc) | |
+ v1[j + 1] = dc; | |
+ else if (ic < dc && ic < sc) | |
+ v1[j + 1] = ic; | |
+ else | |
+ v1[j + 1] = sc; | |
+ } | |
+ t = v0; | |
+ v0 = v1; | |
+ v1 = t; | |
+ } | |
+ | |
+ r = v0[strlen(b)]; | |
+ | |
+ free(v1); | |
+ free(v0); | |
+ | |
+ return r; | |
+} | |
+ | |
+/* | |
+ Remove common prefix and suffix. The pazz0distance is the sum of the l… | |
+*/ | |
+size_t | |
+pazz0distance(char *a, char *b) | |
+{ | |
+ size_t pl, sl; | |
+ size_t al, bl; | |
+ | |
+ al = strlen(a); | |
+ bl = strlen(b); | |
+ | |
+ for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++); | |
+ for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - … | |
+ | |
+ return al + bl - 2 * (pl + sl); | |
+} | |
+ | |
+/* | |
+ https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive | |
+*/ | |
+size_t | |
+levenshteindistance_slow(char *a, char *b) | |
+{ | |
+ size_t k, l, m; | |
+ | |
+ if (!*a) | |
+ return strlen(b); | |
+ else if (!*b) | |
+ return strlen(a); | |
+ else if (*a == *b) | |
+ return levenshteindistance_slow(a + 1, b + 1); | |
+ | |
+ k = levenshteindistance_slow(a + 1, b); | |
+ l = levenshteindistance_slow(a, b + 1); | |
+ m = levenshteindistance_slow(a + 1, b + 1); | |
+ | |
+ if (k < l && k < m) | |
+ return 1 + k; | |
+ else if (l < k && l < m) | |
+ return 1 + l; | |
+ else | |
+ return 1 + m; | |
+} | |
+ | |
+ssize_t | |
+clustering(char *host, char *port, char *selector, struct gopherentry *g, size… | |
+{ | |
+ size_t i, j; | |
+ ssize_t r; | |
+ double *distances, d; | |
+ double *mdists; | |
+ struct { | |
+ double selector; | |
+ double segment; | |
+ double index; | |
+ } *distanceparts, maxdistanceparts; | |
+ uint32_t prng; | |
+ struct point *points; | |
+ | |
+ distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts)); | |
+ | |
+ /* So the sammon mapping can be happy. */ | |
+ prng = fnv1a(4, host, port, selector, "distancewiggle"); | |
+ memset(&maxdistanceparts, 0, sizeof(maxdistanceparts)); | |
+ for (i = 0; i < l; i++) { | |
+ memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanc… | |
+ for (j = 0; j < i; j++) { | |
+ distanceparts[distanceindex(i, j)].selector = levensht… | |
+ distanceparts[distanceindex(i, j)].segment = diff(g[i]… | |
+ distanceparts[distanceindex(i, j)].index = diff(g[i].i… | |
+ | |
+ if (distanceparts[distanceindex(i, j)].selector > maxd… | |
+ maxdistanceparts.selector = distanceparts[dist… | |
+ if (distanceparts[distanceindex(i, j)].segment > maxdi… | |
+ maxdistanceparts.segment = distanceparts[dista… | |
+ if (distanceparts[distanceindex(i, j)].index > maxdist… | |
+ maxdistanceparts.index = distanceparts[distanc… | |
+ } | |
+ } | |
+ for (i = 0; i < l; i++) { | |
+ for (j = 0; j < i; j++) { | |
+ distanceparts[distanceindex(i, j)].selector /= maxdist… | |
+ distanceparts[distanceindex(i, j)].segment /= maxdista… | |
+ distanceparts[distanceindex(i, j)].index /= maxdistanc… | |
+ } | |
+ } | |
+ | |
+ distances = xmalloc(triangularnumber(l) * sizeof(*distances)); | |
+ for (i = 0; i < l; i++) { | |
+ distances[distanceindex(i, i)] = 0; | |
+ for (j = 0; j < i; j++) | |
+ distances[distanceindex(i, j)] = 0.75 * distanceparts[… | |
+ } | |
+ free(distanceparts); | |
+ | |
+ r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector… | |
+ | |
+ points = calloc(l, sizeof(*points)); | |
+ sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), poi… | |
+ | |
+ for (i = 0; i < l; i++) | |
+ printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i… | |
+ | |
+ free(points); | |
+ free(distances); | |
+ | |
+ return r; | |
+} | |
+ | |
+char gopherbuffer[16 * 1024 * 1024]; | |
+ | |
+int | |
+main(int argc, char *argv[]) | |
+{ | |
+ ssize_t l; | |
+ size_t k; | |
+ struct gopherentry *g; | |
+ char *host, *port, *selector; | |
+ size_t *cassocs; | |
+ | |
+ if (argc < 2) { | |
+ printf("%s host [port [selector]]\n", argv[0]); | |
+ return 1; | |
+ } | |
+ | |
+ host = argv[1]; | |
+ port = (argc > 2) ? argv[2] : "70"; | |
+ selector = (argc > 3) ? argv[3] : ""; | |
+ | |
+ l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbu… | |
+ if (l == -1) | |
+ return 1; | |
+ | |
+ l = gopher_parsemenu(gopherbuffer, &g); | |
+ if (l == -1) | |
+ return 1; | |
+ | |
+ l = gopher_removeentries(g, l); | |
+ | |
+ cassocs = calloc(l, sizeof(*cassocs)); | |
+ if (!cassocs) | |
+ return 1; | |
+ k = 5; | |
+ clustering(host, port, selector, g, l, cassocs, k); | |
+ | |
+ return 0; | |
+} | |
diff --git a/gopher/gopher-clustering/hint.md b/gopher/gopher-clustering/hint.md | |
@@ -0,0 +1,9 @@ | |
+ | |
+14:28:09 pazz0 | `egcc -Ofast -o gopher-clustering | |
+gopher-clustering.c -lm` | |
+14:28:19 pazz0 | `./gopher-clustering bitreich.org 70 /index.gph | |
+| sed '1,/^Silh/d' > clustering-output` | |
+14:28:34 pazz0 | And in gnuplot: `plot 'clustering-output' using | |
+1:2:5:(4+$4):3 with labels point pt var lc | |
+ | var` | |
+ |