Use one listening socket for all threads - quark - quark web server | |
git clone git://git.suckless.org/quark | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 7814309e9a2c386f646892403e53f2a87b929b0c | |
parent e2463e733e4880c1d8034e3b90072825509ceb69 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Mon, 22 Feb 2021 18:39:49 +0100 | |
Use one listening socket for all threads | |
Previously, we employed SO_REUSEPORT and bound as many listening | |
sockets as we had threads. Inside the kernel, this creates separate | |
queues for each socket and a hash-based-round-robin distributes incoming | |
requests evenly among the listening sockets. | |
As a result, the load was well-balanced among threads, in contrast to | |
sharing one listening socket (and one shared queue), because when | |
epoll() registers new incoming connections, it takes the last-activated | |
thread (FIFO). As a result, only one thread usually gets most of the | |
load, unless the server is really stressed. | |
So why change it back? The reason is latency. It's not a coincidence | |
that when you're at the supermarket you very often see another queue | |
pass along much faster than you. Even though a congestion (i.e. slow | |
cashier or customer) is completely random and no cashier-post is | |
favoured, all people in the congested queue are affected (the chance | |
is 2/3 to be in a slower queue when there are 3 queues). | |
A much more efficient approach is to have one shared queue and 3 | |
cashiers. Even when there's a congestion at one cashier-post, the others | |
will continue processing customers and the overall latency is much | |
more consistent. | |
The same applies to the connection-queue: If a thread is really busy | |
with a request, the waiting connections will be affected without the | |
possibility of being processed by another idle thread. | |
Another reason is efficiency: The hash-based-round-robin in the kernel | |
adds a small overhead that can be avoided this way, it removes | |
complexity in the code and undoes the inconsistent "hack" where we | |
just gave each thread the same listening socket when we were working | |
with a UNIX-domain socket. | |
This change adds a small "regression": When quark is hammered with a | |
lot of connections (>15k/s), it is more motivated to drop connections | |
because epoll has the aforementioned FIFO-behaviour (i.e. it tends to | |
pass the connection-events to the same thread), which usually accepts | |
before processing read/write-related events. This in turn exhausts the | |
thread's connection-pool at some point, but because it handles that | |
well, we never enter a DoS-state. | |
This commit is a preparation for upcoming changes. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M main.c | 36 ++++++++++++++++++++---------… | |
M server.c | 4 ++-- | |
M server.h | 2 +- | |
M sock.c | 122 +++++++++++------------------… | |
M sock.h | 4 ++-- | |
5 files changed, 69 insertions(+), 99 deletions(-) | |
--- | |
diff --git a/main.c b/main.c | |
@@ -72,7 +72,7 @@ main(int argc, char *argv[]) | |
.docindex = "index.html", | |
}; | |
size_t i; | |
- int *insock = NULL, status = 0; | |
+ int insock, status = 0; | |
const char *err; | |
char *tok[4]; | |
@@ -214,21 +214,31 @@ main(int argc, char *argv[]) | |
} | |
} | |
- /* create a nonblocking listening socket for each thread */ | |
- if (!(insock = reallocarray(insock, nthreads, sizeof(*insock)))) { | |
- die("reallocarray:"); | |
- } | |
- if (udsname ? sock_get_uds_arr(udsname, pwd->pw_uid, grp->gr_gid, | |
- insock, nthreads) : | |
- sock_get_ips_arr(srv.host, srv.port, insock, nthreads)) { | |
+ /* | |
+ * create the (non-blocking) listening socket | |
+ * | |
+ * we could use SO_REUSEPORT and create a listening socket for | |
+ * each thread (for better load-balancing, given each thread | |
+ * would get his own kernel-queue), but this increases latency | |
+ * (as a thread might get stuck on a larger request, making all | |
+ * other request wait in line behind it). | |
+ * | |
+ * socket contention with a single listening socket is a | |
+ * non-issue and thread-load-balancing is better fixed in the | |
+ * kernel by changing epoll-sheduling from a FIFO- to a | |
+ * LIFO-model, especially as it doesn't affect performance | |
+ */ | |
+ insock = udsname ? sock_get_uds(udsname, pwd->pw_uid, grp->gr_gid) : | |
+ sock_get_ips(srv.host, srv.port); | |
+ if (sock_set_nonblocking(insock)) { | |
return 1; | |
} | |
- for (i = 0; i < nthreads; i++) { | |
- if (sock_set_nonblocking(insock[i])) { | |
- return 1; | |
- } | |
- } | |
+ /* | |
+ * before dropping privileges, we fork, as we need to remove | |
+ * the UNIX-domain socket when we shut down, which we need | |
+ * privileges for | |
+ */ | |
switch (fork()) { | |
case -1: | |
warn("fork:"); | |
diff --git a/server.c b/server.c | |
@@ -134,7 +134,7 @@ server_worker(void *data) | |
} | |
void | |
-server_init_thread_pool(int *insock, size_t nthreads, size_t nslots, | |
+server_init_thread_pool(int insock, size_t nthreads, size_t nslots, | |
const struct server *srv) | |
{ | |
pthread_t *thread = NULL; | |
@@ -146,7 +146,7 @@ server_init_thread_pool(int *insock, size_t nthreads, size_… | |
die("reallocarray:"); | |
} | |
for (i = 0; i < nthreads; i++) { | |
- d[i].insock = insock[i]; | |
+ d[i].insock = insock; | |
d[i].nslots = nslots; | |
d[i].srv = srv; | |
} | |
diff --git a/server.h b/server.h | |
@@ -30,6 +30,6 @@ struct server { | |
size_t map_len; | |
}; | |
-void server_init_thread_pool(int *, size_t, size_t, const struct server *); | |
+void server_init_thread_pool(int, size_t, size_t, const struct server *); | |
#endif /* SERVER_H */ | |
diff --git a/sock.c b/sock.c | |
@@ -18,8 +18,7 @@ | |
#include "util.h" | |
int | |
-sock_get_ips_arr(const char *host, const char* port, int *sockfd, | |
- size_t sockfdlen) | |
+sock_get_ips(const char *host, const char* port) | |
{ | |
struct addrinfo hints = { | |
.ai_flags = AI_NUMERICSERV, | |
@@ -27,135 +26,96 @@ sock_get_ips_arr(const char *host, const char* port, int *… | |
.ai_socktype = SOCK_STREAM, | |
}; | |
struct addrinfo *ai, *p; | |
- int r; | |
- size_t i, j; | |
+ int ret, insock = 0; | |
- if ((r = getaddrinfo(host, port, &hints, &ai))) { | |
- warn("getaddrinfo: %s", gai_strerror(r)); | |
- return 1; | |
+ if ((ret = getaddrinfo(host, port, &hints, &ai))) { | |
+ die("getaddrinfo: %s", gai_strerror(ret)); | |
} | |
for (p = ai; p; p = p->ai_next) { | |
- /* try generating sockfds */ | |
- for (i = 0; i < sockfdlen; i++) { | |
- if ((sockfd[i] = socket(p->ai_family, p->ai_socktype, | |
- p->ai_protocol)) < 0) { | |
- /* retry with the next addrinfo */ | |
- break; | |
- } | |
- | |
- /* | |
- * set SO_REUSEPORT, so it becomes possible to bind | |
- * to the same port with multiple sockets, which | |
- * is what we're doing here | |
- */ | |
- if (setsockopt(sockfd[i], SOL_SOCKET, SO_REUSEPORT, | |
- &(int){1}, sizeof(int)) < 0) { | |
- warn("setsockopt:"); | |
- return 1; | |
- } | |
- | |
- if (bind(sockfd[i], p->ai_addr, p->ai_addrlen) < 0) { | |
- /* bind failed, close all previous fd's and re… | |
- for (j = 0; j <= i; j++) { | |
- if (close(sockfd[i]) < 0) { | |
- warn("close:"); | |
- return 1; | |
- } | |
- } | |
- break; | |
- } | |
+ if ((insock = socket(p->ai_family, p->ai_socktype, | |
+ p->ai_protocol)) < 0) { | |
+ continue; | |
+ } | |
+ if (setsockopt(insock, SOL_SOCKET, SO_REUSEADDR, | |
+ &(int){1}, sizeof(int)) < 0) { | |
+ die("setsockopt:"); | |
} | |
- if (i == sockfdlen) { | |
- /* we have generated all requested fds */ | |
- break; | |
+ if (bind(insock, p->ai_addr, p->ai_addrlen) < 0) { | |
+ /* bind failed, close the insock and retry */ | |
+ if (close(insock) < 0) { | |
+ die("close:"); | |
+ } | |
+ continue; | |
} | |
+ break; | |
} | |
freeaddrinfo(ai); | |
if (!p) { | |
/* we exhaustet the addrinfo-list and found no connection */ | |
if (errno == EACCES) { | |
- warn("You need to run as root or have " | |
- "CAP_NET_BIND_SERVICE set to bind to " | |
- "privileged ports"); | |
+ die("You need to run as root or have " | |
+ "CAP_NET_BIND_SERVICE set to bind to " | |
+ "privileged ports"); | |
} else { | |
- warn("bind:"); | |
+ die("bind:"); | |
} | |
- return 1; | |
} | |
- for (i = 0; i < sockfdlen; i++) { | |
- if (listen(sockfd[i], SOMAXCONN) < 0) { | |
- warn("listen:"); | |
- return 1; | |
- } | |
+ if (listen(insock, SOMAXCONN) < 0) { | |
+ die("listen:"); | |
} | |
- return 0; | |
-} | |
- | |
-void | |
-sock_rem_uds(const char *udsname) | |
-{ | |
- if (unlink(udsname) < 0) { | |
- die("unlink '%s':", udsname); | |
- } | |
+ return insock; | |
} | |
int | |
-sock_get_uds_arr(const char *udsname, uid_t uid, gid_t gid, int *sockfd, | |
- size_t sockfdlen) | |
+sock_get_uds(const char *udsname, uid_t uid, gid_t gid) | |
{ | |
struct sockaddr_un addr = { | |
.sun_family = AF_UNIX, | |
}; | |
- size_t udsnamelen, i; | |
+ size_t udsnamelen; | |
int insock, sockmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | | |
S_IROTH | S_IWOTH; | |
if ((insock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { | |
- warn("socket:"); | |
- return 1; | |
+ die("socket:"); | |
} | |
if ((udsnamelen = strlen(udsname)) > sizeof(addr.sun_path) - 1) { | |
- warn("UNIX-domain socket name truncated"); | |
- return 1; | |
+ die("UNIX-domain socket name truncated"); | |
} | |
memcpy(addr.sun_path, udsname, udsnamelen + 1); | |
if (bind(insock, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { | |
- warn("bind '%s':", udsname); | |
- return 1; | |
+ die("bind '%s':", udsname); | |
} | |
if (listen(insock, SOMAXCONN) < 0) { | |
sock_rem_uds(udsname); | |
- warn("listen:"); | |
- return 1; | |
+ die("listen:"); | |
} | |
if (chmod(udsname, sockmode) < 0) { | |
sock_rem_uds(udsname); | |
- warn("chmod '%s':", udsname); | |
- return 1; | |
+ die("chmod '%s':", udsname); | |
} | |
if (chown(udsname, uid, gid) < 0) { | |
sock_rem_uds(udsname); | |
- warn("chown '%s':", udsname); | |
- return 1; | |
+ die("chown '%s':", udsname); | |
} | |
- for (i = 0; i < sockfdlen; i++) { | |
- /* | |
- * we can't bind to an AF_UNIX socket more than once, | |
- * so we just reuse the same fd on all threads. | |
- */ | |
- sockfd[i] = insock; | |
- } | |
+ return insock; | |
+} | |
- return 0; | |
+void | |
+sock_rem_uds(const char *udsname) | |
+{ | |
+ if (unlink(udsname) < 0) { | |
+ die("unlink '%s':", udsname); | |
+ } | |
} | |
int | |
diff --git a/sock.h b/sock.h | |
@@ -6,9 +6,9 @@ | |
#include <sys/socket.h> | |
#include <sys/types.h> | |
-int sock_get_ips_arr(const char *, const char *, int *, size_t); | |
+int sock_get_ips(const char *, const char *); | |
+int sock_get_uds(const char *, uid_t, gid_t); | |
void sock_rem_uds(const char *); | |
-int sock_get_uds_arr(const char *, uid_t, gid_t, int *, size_t); | |
int sock_set_timeout(int, int); | |
int sock_set_nonblocking(int); | |
int sock_get_inaddr_str(const struct sockaddr_storage *, char *, size_t); |