/*      $NetBSD: bufferevent_sock.c,v 1.1.1.4 2021/04/07 02:43:13 christos Exp $        */
/*
* Copyright (c) 2007-2012 Niels Provos and Nick Mathewson
* Copyright (c) 2002-2006 Niels Provos <[email protected]>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "event2/event-config.h"
#include <sys/cdefs.h>
__RCSID("$NetBSD: bufferevent_sock.c,v 1.1.1.4 2021/04/07 02:43:13 christos Exp $");
#include "evconfig-private.h"

#include <sys/types.h>

#ifdef EVENT__HAVE_SYS_TIME_H
#include <sys/time.h>
#endif

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef EVENT__HAVE_STDARG_H
#include <stdarg.h>
#endif
#ifdef EVENT__HAVE_UNISTD_H
#include <unistd.h>
#endif

#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#endif

#ifdef EVENT__HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef EVENT__HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef EVENT__HAVE_NETINET_IN6_H
#include <netinet/in6.h>
#endif

#include "event2/util.h"
#include "event2/bufferevent.h"
#include "event2/buffer.h"
#include "event2/bufferevent_struct.h"
#include "event2/bufferevent_compat.h"
#include "event2/event.h"
#include "log-internal.h"
#include "mm-internal.h"
#include "bufferevent-internal.h"
#include "util-internal.h"
#ifdef _WIN32
#include "iocp-internal.h"
#endif

/* prototypes */
static int be_socket_enable(struct bufferevent *, short);
static int be_socket_disable(struct bufferevent *, short);
static void be_socket_destruct(struct bufferevent *);
static int be_socket_flush(struct bufferevent *, short, enum bufferevent_flush_mode);
static int be_socket_ctrl(struct bufferevent *, enum bufferevent_ctrl_op, union bufferevent_ctrl_data *);

static void be_socket_setfd(struct bufferevent *, evutil_socket_t);

const struct bufferevent_ops bufferevent_ops_socket = {
       "socket",
       evutil_offsetof(struct bufferevent_private, bev),
       be_socket_enable,
       be_socket_disable,
       NULL, /* unlink */
       be_socket_destruct,
       bufferevent_generic_adj_existing_timeouts_,
       be_socket_flush,
       be_socket_ctrl,
};

const struct sockaddr*
bufferevent_socket_get_conn_address_(struct bufferevent *bev)
{
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);
       return (struct sockaddr *)&bev_p->conn_address;
}

void
bufferevent_socket_set_conn_address_fd_(struct bufferevent *bev,
       evutil_socket_t fd)
{
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);

       socklen_t len = sizeof(bev_p->conn_address);

       struct sockaddr *addr = (struct sockaddr *)&bev_p->conn_address;
       if (addr->sa_family != AF_UNSPEC)
               getpeername(fd, addr, &len);
}

void
bufferevent_socket_set_conn_address_(struct bufferevent *bev,
       struct sockaddr *addr, size_t addrlen)
{
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);
       EVUTIL_ASSERT(addrlen <= sizeof(bev_p->conn_address));
       memcpy(&bev_p->conn_address, addr, addrlen);
}

static void
bufferevent_socket_outbuf_cb(struct evbuffer *buf,
   const struct evbuffer_cb_info *cbinfo,
   void *arg)
{
       struct bufferevent *bufev = arg;
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);

       if (cbinfo->n_added &&
           (bufev->enabled & EV_WRITE) &&
           !event_pending(&bufev->ev_write, EV_WRITE, NULL) &&
           !bufev_p->write_suspended) {
               /* Somebody added data to the buffer, and we would like to
                * write, and we were not writing.  So, start writing. */
               if (bufferevent_add_event_(&bufev->ev_write, &bufev->timeout_write) == -1) {
                   /* Should we log this? */
               }
       }
}

static void
bufferevent_readcb(evutil_socket_t fd, short event, void *arg)
{
       struct bufferevent *bufev = arg;
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);
       struct evbuffer *input;
       int res = 0;
       short what = BEV_EVENT_READING;
       ev_ssize_t howmuch = -1, readmax=-1;

       bufferevent_incref_and_lock_(bufev);

       if (event == EV_TIMEOUT) {
               /* Note that we only check for event==EV_TIMEOUT. If
                * event==EV_TIMEOUT|EV_READ, we can safely ignore the
                * timeout, since a read has occurred */
               what |= BEV_EVENT_TIMEOUT;
               goto error;
       }

       input = bufev->input;

       /*
        * If we have a high watermark configured then we don't want to
        * read more data than would make us reach the watermark.
        */
       if (bufev->wm_read.high != 0) {
               howmuch = bufev->wm_read.high - evbuffer_get_length(input);
               /* we somehow lowered the watermark, stop reading */
               if (howmuch <= 0) {
                       bufferevent_wm_suspend_read(bufev);
                       goto done;
               }
       }
       readmax = bufferevent_get_read_max_(bufev_p);
       if (howmuch < 0 || howmuch > readmax) /* The use of -1 for "unlimited"
                                              * uglifies this code. XXXX */
               howmuch = readmax;
       if (bufev_p->read_suspended)
               goto done;

       evbuffer_unfreeze(input, 0);
       res = evbuffer_read(input, fd, (int)howmuch); /* XXXX evbuffer_read would do better to take and return ev_ssize_t */
       evbuffer_freeze(input, 0);

       if (res == -1) {
               int err = evutil_socket_geterror(fd);
               if (EVUTIL_ERR_RW_RETRIABLE(err))
                       goto reschedule;
               if (EVUTIL_ERR_CONNECT_REFUSED(err)) {
                       bufev_p->connection_refused = 1;
                       goto done;
               }
               /* error case */
               what |= BEV_EVENT_ERROR;
       } else if (res == 0) {
               /* eof case */
               what |= BEV_EVENT_EOF;
       }

       if (res <= 0)
               goto error;

       bufferevent_decrement_read_buckets_(bufev_p, res);

       /* Invoke the user callback - must always be called last */
       bufferevent_trigger_nolock_(bufev, EV_READ, 0);

       goto done;

reschedule:
       goto done;

error:
       bufferevent_disable(bufev, EV_READ);
       bufferevent_run_eventcb_(bufev, what, 0);

done:
       bufferevent_decref_and_unlock_(bufev);
}

static void
bufferevent_writecb(evutil_socket_t fd, short event, void *arg)
{
       struct bufferevent *bufev = arg;
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);
       int res = 0;
       short what = BEV_EVENT_WRITING;
       int connected = 0;
       ev_ssize_t atmost = -1;

       bufferevent_incref_and_lock_(bufev);

       if (event == EV_TIMEOUT) {
               /* Note that we only check for event==EV_TIMEOUT. If
                * event==EV_TIMEOUT|EV_WRITE, we can safely ignore the
                * timeout, since a read has occurred */
               what |= BEV_EVENT_TIMEOUT;
               goto error;
       }
       if (bufev_p->connecting) {
               int c = evutil_socket_finished_connecting_(fd);
               /* we need to fake the error if the connection was refused
                * immediately - usually connection to localhost on BSD */
               if (bufev_p->connection_refused) {
                       bufev_p->connection_refused = 0;
                       c = -1;
               }

               if (c == 0)
                       goto done;

               bufev_p->connecting = 0;
               if (c < 0) {
                       event_del(&bufev->ev_write);
                       event_del(&bufev->ev_read);
                       bufferevent_run_eventcb_(bufev, BEV_EVENT_ERROR, 0);
                       goto done;
               } else {
                       connected = 1;
                       bufferevent_socket_set_conn_address_fd_(bufev, fd);
#ifdef _WIN32
                       if (BEV_IS_ASYNC(bufev)) {
                               event_del(&bufev->ev_write);
                               bufferevent_async_set_connected_(bufev);
                               bufferevent_run_eventcb_(bufev,
                                               BEV_EVENT_CONNECTED, 0);
                               goto done;
                       }
#endif
                       bufferevent_run_eventcb_(bufev,
                                       BEV_EVENT_CONNECTED, 0);
                       if (!(bufev->enabled & EV_WRITE) ||
                           bufev_p->write_suspended) {
                               event_del(&bufev->ev_write);
                               goto done;
                       }
               }
       }

       atmost = bufferevent_get_write_max_(bufev_p);

       if (bufev_p->write_suspended)
               goto done;

       if (evbuffer_get_length(bufev->output)) {
               evbuffer_unfreeze(bufev->output, 1);
               res = evbuffer_write_atmost(bufev->output, fd, atmost);
               evbuffer_freeze(bufev->output, 1);
               if (res == -1) {
                       int err = evutil_socket_geterror(fd);
                       if (EVUTIL_ERR_RW_RETRIABLE(err))
                               goto reschedule;
                       what |= BEV_EVENT_ERROR;
               } else if (res == 0) {
                       /* eof case
                          XXXX Actually, a 0 on write doesn't indicate
                          an EOF. An ECONNRESET might be more typical.
                        */
                       what |= BEV_EVENT_EOF;
               }
               if (res <= 0)
                       goto error;

               bufferevent_decrement_write_buckets_(bufev_p, res);
       }

       if (evbuffer_get_length(bufev->output) == 0) {
               event_del(&bufev->ev_write);
       }

       /*
        * Invoke the user callback if our buffer is drained or below the
        * low watermark.
        */
       if (res || !connected) {
               bufferevent_trigger_nolock_(bufev, EV_WRITE, 0);
       }

       goto done;

reschedule:
       if (evbuffer_get_length(bufev->output) == 0) {
               event_del(&bufev->ev_write);
       }
       goto done;

error:
       bufferevent_disable(bufev, EV_WRITE);
       bufferevent_run_eventcb_(bufev, what, 0);

done:
       bufferevent_decref_and_unlock_(bufev);
}

struct bufferevent *
bufferevent_socket_new(struct event_base *base, evutil_socket_t fd,
   int options)
{
       struct bufferevent_private *bufev_p;
       struct bufferevent *bufev;

#ifdef _WIN32
       if (base && event_base_get_iocp_(base))
               return bufferevent_async_new_(base, fd, options);
#endif

       if ((bufev_p = mm_calloc(1, sizeof(struct bufferevent_private)))== NULL)
               return NULL;

       if (bufferevent_init_common_(bufev_p, base, &bufferevent_ops_socket,
                                   options) < 0) {
               mm_free(bufev_p);
               return NULL;
       }
       bufev = &bufev_p->bev;
       evbuffer_set_flags(bufev->output, EVBUFFER_FLAG_DRAINS_TO_FD);

       event_assign(&bufev->ev_read, bufev->ev_base, fd,
           EV_READ|EV_PERSIST|EV_FINALIZE, bufferevent_readcb, bufev);
       event_assign(&bufev->ev_write, bufev->ev_base, fd,
           EV_WRITE|EV_PERSIST|EV_FINALIZE, bufferevent_writecb, bufev);

       evbuffer_add_cb(bufev->output, bufferevent_socket_outbuf_cb, bufev);

       evbuffer_freeze(bufev->input, 0);
       evbuffer_freeze(bufev->output, 1);

       return bufev;
}

int
bufferevent_socket_connect(struct bufferevent *bev,
   const struct sockaddr *sa, int socklen)
{
       struct bufferevent_private *bufev_p = BEV_UPCAST(bev);

       evutil_socket_t fd;
       int r = 0;
       int result=-1;
       int ownfd = 0;

       bufferevent_incref_and_lock_(bev);

       fd = bufferevent_getfd(bev);
       if (fd < 0) {
               if (!sa)
                       goto done;
               fd = evutil_socket_(sa->sa_family,
                   SOCK_STREAM|EVUTIL_SOCK_NONBLOCK, 0);
               if (fd < 0)
                       goto freesock;
               ownfd = 1;
       }
       if (sa) {
#ifdef _WIN32
               if (bufferevent_async_can_connect_(bev)) {
                       bufferevent_setfd(bev, fd);
                       r = bufferevent_async_connect_(bev, fd, sa, socklen);
                       if (r < 0)
                               goto freesock;
                       bufev_p->connecting = 1;
                       result = 0;
                       goto done;
               } else
#endif
               r = evutil_socket_connect_(&fd, sa, socklen);
               if (r < 0)
                       goto freesock;
       }
#ifdef _WIN32
       /* ConnectEx() isn't always around, even when IOCP is enabled.
        * Here, we borrow the socket object's write handler to fall back
        * on a non-blocking connect() when ConnectEx() is unavailable. */
       if (BEV_IS_ASYNC(bev)) {
               event_assign(&bev->ev_write, bev->ev_base, fd,
                   EV_WRITE|EV_PERSIST|EV_FINALIZE, bufferevent_writecb, bev);
       }
#endif
       bufferevent_setfd(bev, fd);
       if (r == 0) {
               if (! be_socket_enable(bev, EV_WRITE)) {
                       bufev_p->connecting = 1;
                       result = 0;
                       goto done;
               }
       } else if (r == 1) {
               /* The connect succeeded already. How very BSD of it. */
               result = 0;
               bufev_p->connecting = 1;
               bufferevent_trigger_nolock_(bev, EV_WRITE, BEV_OPT_DEFER_CALLBACKS);
       } else {
               /* The connect failed already.  How very BSD of it. */
               result = 0;
               bufferevent_run_eventcb_(bev, BEV_EVENT_ERROR, BEV_OPT_DEFER_CALLBACKS);
               bufferevent_disable(bev, EV_WRITE|EV_READ);
       }

       goto done;

freesock:
       if (ownfd)
               evutil_closesocket(fd);
done:
       bufferevent_decref_and_unlock_(bev);
       return result;
}

static void
bufferevent_connect_getaddrinfo_cb(int result, struct evutil_addrinfo *ai,
   void *arg)
{
       struct bufferevent *bev = arg;
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);
       int r;
       BEV_LOCK(bev);

       bufferevent_unsuspend_write_(bev, BEV_SUSPEND_LOOKUP);
       bufferevent_unsuspend_read_(bev, BEV_SUSPEND_LOOKUP);

       bev_p->dns_request = NULL;

       if (result == EVUTIL_EAI_CANCEL) {
               bev_p->dns_error = result;
               bufferevent_decref_and_unlock_(bev);
               return;
       }
       if (result != 0) {
               bev_p->dns_error = result;
               bufferevent_run_eventcb_(bev, BEV_EVENT_ERROR, 0);
               bufferevent_decref_and_unlock_(bev);
               if (ai)
                       evutil_freeaddrinfo(ai);
               return;
       }

       /* XXX use the other addrinfos? */
       bufferevent_socket_set_conn_address_(bev, ai->ai_addr, (int)ai->ai_addrlen);
       r = bufferevent_socket_connect(bev, ai->ai_addr, (int)ai->ai_addrlen);
       if (r < 0)
               bufferevent_run_eventcb_(bev, BEV_EVENT_ERROR, 0);
       bufferevent_decref_and_unlock_(bev);
       evutil_freeaddrinfo(ai);
}

int
bufferevent_socket_connect_hostname(struct bufferevent *bev,
   struct evdns_base *evdns_base, int family, const char *hostname, int port)
{
       char portbuf[10];
       struct evutil_addrinfo hint;
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);

       if (family != AF_INET && family != AF_INET6 && family != AF_UNSPEC)
               return -1;
       if (port < 1 || port > 65535)
               return -1;

       memset(&hint, 0, sizeof(hint));
       hint.ai_family = family;
       hint.ai_protocol = IPPROTO_TCP;
       hint.ai_socktype = SOCK_STREAM;

       evutil_snprintf(portbuf, sizeof(portbuf), "%d", port);

       BEV_LOCK(bev);
       bev_p->dns_error = 0;

       bufferevent_suspend_write_(bev, BEV_SUSPEND_LOOKUP);
       bufferevent_suspend_read_(bev, BEV_SUSPEND_LOOKUP);

       bufferevent_incref_(bev);
       bev_p->dns_request = evutil_getaddrinfo_async_(evdns_base, hostname,
           portbuf, &hint, bufferevent_connect_getaddrinfo_cb, bev);
       BEV_UNLOCK(bev);

       return 0;
}

int
bufferevent_socket_get_dns_error(struct bufferevent *bev)
{
       int rv;
       struct bufferevent_private *bev_p = BEV_UPCAST(bev);

       BEV_LOCK(bev);
       rv = bev_p->dns_error;
       BEV_UNLOCK(bev);

       return rv;
}

/*
* Create a new buffered event object.
*
* The read callback is invoked whenever we read new data.
* The write callback is invoked whenever the output buffer is drained.
* The error callback is invoked on a write/read error or on EOF.
*
* Both read and write callbacks maybe NULL.  The error callback is not
* allowed to be NULL and have to be provided always.
*/

struct bufferevent *
bufferevent_new(evutil_socket_t fd,
   bufferevent_data_cb readcb, bufferevent_data_cb writecb,
   bufferevent_event_cb eventcb, void *cbarg)
{
       struct bufferevent *bufev;

       if (!(bufev = bufferevent_socket_new(NULL, fd, 0)))
               return NULL;

       bufferevent_setcb(bufev, readcb, writecb, eventcb, cbarg);

       return bufev;
}


static int
be_socket_enable(struct bufferevent *bufev, short event)
{
       if (event & EV_READ &&
           bufferevent_add_event_(&bufev->ev_read, &bufev->timeout_read) == -1)
                       return -1;
       if (event & EV_WRITE &&
           bufferevent_add_event_(&bufev->ev_write, &bufev->timeout_write) == -1)
                       return -1;
       return 0;
}

static int
be_socket_disable(struct bufferevent *bufev, short event)
{
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);
       if (event & EV_READ) {
               if (event_del(&bufev->ev_read) == -1)
                       return -1;
       }
       /* Don't actually disable the write if we are trying to connect. */
       if ((event & EV_WRITE) && ! bufev_p->connecting) {
               if (event_del(&bufev->ev_write) == -1)
                       return -1;
       }
       return 0;
}

static void
be_socket_destruct(struct bufferevent *bufev)
{
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);
       evutil_socket_t fd;
       EVUTIL_ASSERT(BEV_IS_SOCKET(bufev));

       fd = event_get_fd(&bufev->ev_read);

       if ((bufev_p->options & BEV_OPT_CLOSE_ON_FREE) && fd >= 0)
               EVUTIL_CLOSESOCKET(fd);

       evutil_getaddrinfo_cancel_async_(bufev_p->dns_request);
}

static int
be_socket_flush(struct bufferevent *bev, short iotype,
   enum bufferevent_flush_mode mode)
{
       return 0;
}


static void
be_socket_setfd(struct bufferevent *bufev, evutil_socket_t fd)
{
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);

       BEV_LOCK(bufev);
       EVUTIL_ASSERT(BEV_IS_SOCKET(bufev));

       event_del(&bufev->ev_read);
       event_del(&bufev->ev_write);

       evbuffer_unfreeze(bufev->input, 0);
       evbuffer_unfreeze(bufev->output, 1);

       event_assign(&bufev->ev_read, bufev->ev_base, fd,
           EV_READ|EV_PERSIST|EV_FINALIZE, bufferevent_readcb, bufev);
       event_assign(&bufev->ev_write, bufev->ev_base, fd,
           EV_WRITE|EV_PERSIST|EV_FINALIZE, bufferevent_writecb, bufev);

       if (fd >= 0)
               bufferevent_enable(bufev, bufev->enabled);

       evutil_getaddrinfo_cancel_async_(bufev_p->dns_request);

       BEV_UNLOCK(bufev);
}

/* XXXX Should non-socket bufferevents support this? */
int
bufferevent_priority_set(struct bufferevent *bufev, int priority)
{
       int r = -1;
       struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);

       BEV_LOCK(bufev);
       if (BEV_IS_ASYNC(bufev) || BEV_IS_FILTER(bufev) || BEV_IS_PAIR(bufev))
               goto done;

       if (event_priority_set(&bufev->ev_read, priority) == -1)
               goto done;
       if (event_priority_set(&bufev->ev_write, priority) == -1)
               goto done;

       event_deferred_cb_set_priority_(&bufev_p->deferred, priority);

       r = 0;
done:
       BEV_UNLOCK(bufev);
       return r;
}

/* XXXX Should non-socket bufferevents support this? */
int
bufferevent_base_set(struct event_base *base, struct bufferevent *bufev)
{
       int res = -1;

       BEV_LOCK(bufev);
       if (!BEV_IS_SOCKET(bufev))
               goto done;

       bufev->ev_base = base;

       res = event_base_set(base, &bufev->ev_read);
       if (res == -1)
               goto done;

       res = event_base_set(base, &bufev->ev_write);
done:
       BEV_UNLOCK(bufev);
       return res;
}

static int
be_socket_ctrl(struct bufferevent *bev, enum bufferevent_ctrl_op op,
   union bufferevent_ctrl_data *data)
{
       switch (op) {
       case BEV_CTRL_SET_FD:
               be_socket_setfd(bev, data->fd);
               return 0;
       case BEV_CTRL_GET_FD:
               data->fd = event_get_fd(&bev->ev_read);
               return 0;
       case BEV_CTRL_GET_UNDERLYING:
       case BEV_CTRL_CANCEL_ALL:
       default:
               return -1;
       }
}