/*
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
* Copyright (c) 2003 Ryan McBride. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
switch (ifa->ifa_addr->sa_family) {
case AF_INET: {
int count = 0;
struct rtentry *rt;
int hr_otherif, nr_ourif;
/*
* Avoid screwing with the routes if there are other
* carp interfaces which are master and have the same
* address.
*/
if (sc->sc_carpdev != NULL &&
sc->sc_carpdev->if_carp != NULL) {
count = carp_addrcount(
(struct carp_if *)sc->sc_carpdev->if_carp,
ifatoia(ifa), CARP_COUNT_MASTER);
if ((cmd == RTM_ADD && count != 1) ||
(cmd == RTM_DELETE && count != 0))
goto next;
}
/* Remove the existing host route, if any */
rtrequest(RTM_DELETE, ifa->ifa_addr,
ifa->ifa_addr, ifa->ifa_netmask,
RTF_HOST, NULL);
rt = NULL;
(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
ifa->ifa_netmask, RTF_HOST, &rt);
hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
(rt->rt_flags & RTF_CONNECTED));
if (rt != NULL) {
rt_unref(rt);
rt = NULL;
}
if (rtrequest(RTM_ADD, ifa->ifa_addr,
ifa->ifa_addr, ifa->ifa_netmask, 0,
NULL) == 0)
ifa->ifa_flags |= IFA_ROUTE;
}
break;
case RTM_DELETE:
break;
default:
break;
}
if (rt != NULL) {
rt_unref(rt);
rt = NULL;
}
break;
}
#ifdef INET6
case AF_INET6:
if (cmd == RTM_ADD)
in6_ifaddlocal(ifa);
else
in6_ifremlocal(ifa);
break;
#endif /* INET6 */
default:
break;
}
next:
s = pserialize_read_enter();
ifa_release(ifa, &psref);
}
pserialize_read_exit(s);
curlwp_bindx(bound);
KERNEL_UNLOCK_ONE(NULL);
}
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
*/
static void
_carp_proto_input(struct mbuf *m, int hlen, int proto)
{
struct ip *ip = mtod(m, struct ip *);
struct carp_softc *sc = NULL;
struct carp_header *ch;
int iplen, len;
struct ifnet *rcvif;
if (!carp_opts[CARPCTL_ALLOW]) {
m_freem(m);
return;
}
rcvif = m_get_rcvif_NOMPSAFE(m);
/* check if received on a valid carp interface */
if (rcvif->if_type != IFT_CARP) {
CARP_STATINC(CARP_STAT_BADIF);
CARP_LOG(sc, ("packet received on non-carp interface: %s",
rcvif->if_xname));
m_freem(m);
return;
}
/* verify that the IP TTL is 255. */
if (ip->ip_ttl != CARP_DFLTTL) {
CARP_STATINC(CARP_STAT_BADTTL);
CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
CARP_DFLTTL, rcvif->if_xname));
m_freem(m);
return;
}
/*
* verify that the received packet length is
* equal to the CARP header
*/
iplen = ip->ip_hl << 2;
len = iplen + sizeof(*ch);
if (len > m->m_pkthdr.len) {
CARP_STATINC(CARP_STAT_BADLEN);
CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
rcvif->if_xname));
m_freem(m);
return;
}
if ((m = m_pullup(m, len)) == NULL) {
CARP_STATINC(CARP_STAT_HDROPS);
return;
}
ip = mtod(m, struct ip *);
ch = (struct carp_header *)((char *)ip + iplen);
/* verify the CARP checksum */
m->m_data += iplen;
if (carp_cksum(m, len - iplen)) {
CARP_STATINC(CARP_STAT_BADSUM);
CARP_LOG(sc, ("checksum failed on %s",
rcvif->if_xname));
m_freem(m);
return;
}
m->m_data -= iplen;
carp_proto_input_c(m, ch, AF_INET);
}
void
carp_proto_input(struct mbuf *m, int off, int proto)
{
if (!carp_opts[CARPCTL_ALLOW]) {
m_freem(m);
return;
}
rcvif = m_get_rcvif_NOMPSAFE(m);
/* check if received on a valid carp interface */
if (rcvif->if_type != IFT_CARP) {
CARP_STATINC(CARP_STAT_BADIF);
CARP_LOG(sc, ("packet received on non-carp interface: %s",
rcvif->if_xname));
m_freem(m);
return;
}
/* verify that the IP TTL is 255 */
if (ip6->ip6_hlim != CARP_DFLTTL) {
CARP_STATINC(CARP_STAT_BADTTL);
CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
CARP_DFLTTL, rcvif->if_xname));
m_freem(m);
return;
}
/* verify that we have a complete carp packet */
len = m->m_len;
M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch));
if (ch == NULL) {
CARP_STATINC(CARP_STAT_BADLEN);
CARP_LOG(sc, ("packet size %u too small", len));
return;
}
/* verify the CARP checksum */
if (carp6_cksum(m, off, sizeof(*ch))) {
CARP_STATINC(CARP_STAT_BADSUM);
CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
m_freem(m);
return;
}
carp_proto_input_c(m, ch, AF_INET6);
return;
}
int
carp6_proto_input(struct mbuf **mp, int *offp, int proto)
{
/*
* Check if our own advertisement was duplicated
* from a non simplex interface.
* XXX If there is no address on our physical interface
* there is no way to distinguish our ads from the ones
* another carp host might have sent us.
*/
if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
struct sockaddr sa;
struct ifaddr *ifa;
int s;
memset(&sa, 0, sizeof(sa));
sa.sa_family = af;
s = pserialize_read_enter();
ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
if (ifa && af == AF_INET) {
struct ip *ip = mtod(m, struct ip *);
if (ip->ip_src.s_addr ==
ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
pserialize_read_exit(s);
m_freem(m);
return;
}
}
#ifdef INET6
if (ifa && af == AF_INET6) {
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct in6_addr in6_src, in6_found;
switch (sc->sc_state) {
case INIT:
break;
case MASTER:
/*
* If we receive an advertisement from a backup who's going to
* be more frequent than us, go into BACKUP state.
*/
if (timercmp(&sc_tv, &ch_tv, >) ||
timercmp(&sc_tv, &ch_tv, ==)) {
callout_stop(&sc->sc_ad_tmo);
CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
carp_setroute(sc, RTM_DELETE);
}
break;
case BACKUP:
/*
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
carp_master_down(sc);
break;
}
/*
* If the master is going to advertise at such a low frequency
* that he's guaranteed to time out, we'd might as well just
* treat him as timed out now.
*/
sc_tv.tv_sec = sc->sc_advbase * 3;
if (timercmp(&sc_tv, &ch_tv, <)) {
CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
carp_master_down(sc);
break;
}
/*
* Otherwise, we reset the counter and wait for the next
* advertisement.
*/
carp_setrun(sc, af);
break;
}
m_freem(m);
return;
}
/*
* Interface side of the CARP implementation.
*/
/* set the source address */
memset(&sa, 0, sizeof(sa));
sa.sa_family = AF_INET6;
_s = pserialize_read_enter();
ifp = sc->sc_carpdev;
ifa = ifaof_ifpforaddr(&sa, ifp);
if (ifa == NULL) { /* This should never happen with IPv6 */
ifp = &sc->sc_if;
ifa = ifaof_ifpforaddr(&sa, ifp);
}
if (ifa == NULL) /* This should never happen with IPv6 */
memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
else
bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
&ip6->ip6_src, sizeof(struct in6_addr));
pserialize_read_exit(_s);
/* set the multicast destination */
/*
* Broadcast a gratuitous ARP request containing
* the virtual router MAC address for each IP address
* associated with the virtual router.
*/
static void
carp_send_arp(struct carp_softc *sc)
{
struct ifaddr *ifa;
int s, bound;
/*
* Based on bridge_hash() in if_bridge.c
*/
#define mix(a,b,c) \
do { \
a -= b; a -= c; a ^= (c >> 13); \
b -= c; b -= a; b ^= (a << 8); \
c -= a; c -= b; c ^= (b >> 13); \
a -= b; a -= c; a ^= (c >> 12); \
b -= c; b -= a; b ^= (a << 16); \
c -= a; c -= b; c ^= (b >> 5); \
a -= b; a -= c; a ^= (c >> 3); \
b -= c; b -= a; b ^= (a << 10); \
c -= a; c -= b; c ^= (b >> 15); \
} while (0)
static u_int32_t
carp_hash(struct carp_softc *sc, u_char *src)
{
u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
c += sc->sc_key[3] << 24;
c += sc->sc_key[2] << 16;
c += sc->sc_key[1] << 8;
c += sc->sc_key[0];
b += src[5] << 8;
b += src[4];
a += src[3] << 24;
a += src[2] << 16;
a += src[1] << 8;
a += src[0];
mix(a, b, c);
return (c);
}
static int
carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
{
struct carp_softc *vh;
struct ifaddr *ifa;
int count = 0;
if (carp_opts[CARPCTL_ARPBALANCE]) {
/*
* We use the source ip to decide which virtual host should
* handle the request. If we're master of that virtual host,
* then we respond, otherwise, just drop the arp packet on
* the floor.
*/
/* Count the elegible carp interfaces with this address */
if (*count == 0)
*count = carp_addrcount(
(struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
ia, CARP_COUNT_RUNNING);
/* This should never happen, but... */
if (*count == 0)
return (0);
if (carp_hash(sc, src) % *count == index - 1 &&
sc->sc_state == MASTER) {
return (1);
}
} else {
if (sc->sc_state == MASTER)
return (1);
}
/*
* When in backup state, af indicates whether to reset the master down timer
* for v4 or v6. If it's set to zero, reset the ones which are already pending.
*/
static void
carp_setrun(struct carp_softc *sc, sa_family_t af)
{
struct timeval tv;
static int
carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
{
struct ifnet *ifp = sc->sc_carpdev;
struct in_ifaddr *ia, *ia_if;
int error = 0;
int s;
if (sin->sin_addr.s_addr == 0) {
if (!(sc->sc_if.if_flags & IFF_UP))
carp_set_state(sc, INIT);
if (sc->sc_naddrs)
sc->sc_if.if_flags |= IFF_UP;
carp_setrun(sc, 0);
return (0);
}
/* we have to do this by hand to ensure we don't match on ourselves */
ia_if = NULL;
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia) {
/* and, yeah, we need a multicast-capable iface too */
if (ia->ia_ifp != &sc->sc_if &&
ia->ia_ifp->if_type != IFT_CARP &&
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
(sin->sin_addr.s_addr & ia->ia_subnetmask) ==
ia->ia_subnet) {
if (!ia_if)
ia_if = ia;
}
}
if (ia_if) {
ia = ia_if;
if (ifp) {
if (ifp != ia->ia_ifp)
return (EADDRNOTAVAIL);
} else {
/* FIXME NOMPSAFE */
ifp = ia->ia_ifp;
}
}
pserialize_read_exit(s);
if ((error = carp_set_ifp(sc, ifp)))
return (error);
if (sc->sc_carpdev == NULL)
return (EADDRNOTAVAIL);
sc->sc_naddrs++;
if (sc->sc_carpdev != NULL)
sc->sc_if.if_flags |= IFF_UP;
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
/*
* Hook if_addrhooks so that we get a callback after in_ifinit has run,
* to correct any inappropriate routes that it inserted.
*/
if (sc->ah_cookie == 0) {
/* XXX link address hook */
}
#ifdef INET6
static int
carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
{
struct ifnet *ifp = sc->sc_carpdev;
struct in6_ifaddr *ia, *ia_if;
int error = 0;
int s;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
if (!(sc->sc_if.if_flags & IFF_UP))
carp_set_state(sc, INIT);
if (sc->sc_naddrs6)
sc->sc_if.if_flags |= IFF_UP;
carp_setrun(sc, 0);
return (0);
}
/* we have to do this by hand to ensure we don't match on ourselves */
ia_if = NULL;
s = pserialize_read_enter();
IN6_ADDRLIST_READER_FOREACH(ia) {
int i;
for (i = 0; i < 4; i++) {
if ((sin6->sin6_addr.s6_addr32[i] &
ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
(ia->ia_addr.sin6_addr.s6_addr32[i] &
ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
break;
}
/* and, yeah, we need a multicast-capable iface too */
if (ia->ia_ifp != &sc->sc_if &&
ia->ia_ifp->if_type != IFT_CARP &&
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
(i == 4)) {
if (!ia_if)
ia_if = ia;
}
}
pserialize_read_exit(s);
if (ia_if) {
ia = ia_if;
if (sc->sc_carpdev) {
if (sc->sc_carpdev != ia->ia_ifp)
return (EADDRNOTAVAIL);
} else {
ifp = ia->ia_ifp;
}
}
if ((error = carp_set_ifp(sc, ifp)))
return (error);
if (sc->sc_carpdev == NULL)
return (EADDRNOTAVAIL);
case SIOCADDMULTI:
error = carp_ether_addmulti(sc, ifr);
break;
case SIOCDELMULTI:
error = carp_ether_delmulti(sc, ifr);
break;
case SIOCSIFCAP:
if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
error = 0;
break;
default:
error = ether_ioctl(ifp, cmd, data);
}
carp_hmac_prepare(sc);
return (error);
}
/*
* Start output on carp interface. This function should never be called.
*/
static void
carp_start(struct ifnet *ifp)
{
#ifdef DEBUG
printf("%s: start called\n", ifp->if_xname);
#endif
}
ifp = sc->sc_carpdev;
if (ifp == NULL)
return (EINVAL);
error = ether_addmulti(sa, &sc->sc_ac);
if (error != ENETRESET)
return (error);
/*
* This is new multicast address. We have to tell parent
* about it. Also, remember this multicast address so that
* we can delete them on unconfigure.
*/
mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
if (mc == NULL) {
error = ENOMEM;
goto alloc_failed;
}
/*
* As ether_addmulti() returns ENETRESET, following two
* statement shouldn't fail.
*/
(void)ether_multiaddr(sa, addrlo, addrhi);
ifp = sc->sc_carpdev;
if (ifp == NULL)
return (EINVAL);
/*
* Find a key to lookup carp_mc_entry. We have to do this
* before calling ether_delmulti for obvious reason.
*/
if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
return (error);
LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
if (mc->mc_enm == enm)
break;
/* We won't delete entries we didn't add */
if (mc == NULL)
return (EINVAL);
error = ether_delmulti(sa, &sc->sc_ac);
if (error != ENETRESET)
return (error);
/* We no longer use this multicast address. Tell parent so. */
error = if_mcast_op(ifp, SIOCDELMULTI, sa);
if (error == 0) {
/* And forget about this address. */
LIST_REMOVE(mc, mc_entries);
free(mc, M_DEVBUF);
} else
(void)ether_addmulti(sa, &sc->sc_ac);
return (error);
}
/*
* Delete any multicast address we have asked to add from parent
* interface. Called when the carp is being unconfigured.
*/
static void
carp_ether_purgemulti(struct carp_softc *sc)
{
struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
struct carp_mc_entry *mc;