/* $NetBSD: ipsec_output.c,v 1.87 2024/07/05 04:31:54 rin Exp $ */
/*
* Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: sys/netipsec/ipsec_output.c,v 1.3.2.2 2003/03/28 20:32:53 sam Exp $
*/
/*
* Add a IPSEC_OUT_DONE tag to mark that we have finished the ipsec processing
* It will be used by ip{,6}_output to check if we have already or not
* processed this packet.
*/
static int
ipsec_register_done(struct mbuf *m, int *error)
{
struct m_tag *mtag;
mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, 0, M_NOWAIT);
if (mtag == NULL) {
IPSECLOG(LOG_DEBUG, "could not get packet tag\n");
*error = ENOMEM;
return -1;
}
m_tag_prepend(m, mtag);
return 0;
}
static int
ipsec_reinject_ipstack(struct mbuf *m, int af, int flags)
{
int rv = -1;
struct route *ro;
KASSERT(af == AF_INET || af == AF_INET6);
KERNEL_LOCK_UNLESS_NET_MPSAFE();
ro = rtcache_percpu_getref(ipsec_rtcache_percpu);
switch (af) {
#ifdef INET
case AF_INET:
rv = ip_output(m, NULL, ro, IP_RAWOUTPUT|IP_NOIPNEWID,
NULL, NULL);
break;
#endif
#ifdef INET6
case AF_INET6:
/*
* We don't need massage, IPv6 header fields are always in
* net endian.
*/
rv = ip6_output(m, NULL, ro, flags, NULL, NULL, NULL);
break;
#endif
}
rtcache_percpu_putref(ipsec_rtcache_percpu);
KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
return rv;
}
int
ipsec_process_done(struct mbuf *m, const struct ipsecrequest *isr,
struct secasvar *sav, int flags)
{
struct secasindex *saidx;
int error;
#ifdef INET
struct ip *ip;
#endif
#ifdef INET6
struct ip6_hdr *ip6;
#endif
struct mbuf *mo;
struct udphdr *udp = NULL;
int hlen, roff, iphlen;
/*
* Fix the header length, for AH processing.
*/
switch (saidx->dst.sa.sa_family) {
#ifdef INET
case AF_INET:
ip = mtod(m, struct ip *);
ip->ip_len = htons(m->m_pkthdr.len);
/* IPv4 packet does not have to be set UDP checksum. */
if (sav->natt_type != 0)
ip->ip_p = IPPROTO_UDP;
break;
#endif
#ifdef INET6
case AF_INET6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
error = ENXIO;
goto bad;
}
if (m->m_pkthdr.len - sizeof(struct ip6_hdr) > IPV6_MAXPACKET) {
/* No jumbogram support. */
error = ENXIO; /*?*/
goto bad;
}
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
/* IPv6 packet should be set UDP checksum. */
if (sav->natt_type != 0) {
ip6->ip6_nxt = IPPROTO_UDP;
ipsec6_udp_cksum(m);
}
break;
#endif
default:
IPSECLOG(LOG_DEBUG, "unknown protocol family %u\n",
saidx->dst.sa.sa_family);
error = ENXIO;
goto bad;
}
key_sa_recordxfer(sav, m);
/*
* If there's another (bundled) SA to apply, do so.
* Note that this puts a burden on the kernel stack size.
* If this is a problem we'll need to introduce a queue
* to set the packet on so we can unwind the stack before
* doing further processing.
*/
if (isr->next) {
IPSEC_STATINC(IPSEC_STAT_OUT_BUNDLESA);
switch (saidx->dst.sa.sa_family) {
#ifdef INET
case AF_INET:
return ipsec4_process_packet(m, isr->next, NULL);
#endif
#ifdef INET6
case AF_INET6:
return ipsec6_process_packet(m, isr->next, flags);
#endif
default:
IPSECLOG(LOG_DEBUG, "unknown protocol family %u\n",
saidx->dst.sa.sa_family);
error = ENXIO;
goto bad;
}
}
/*
* We're done with IPsec processing, mark the packet as processed,
* and transmit it using the appropriate network protocol
* (IPv4/IPv6).
*/
/*
* For NAT-T enabled ipsecif(4), set NAT-T port numbers
* even if the saidx uses transport mode.
*
* See also ipsecif[46]_output().
*/
mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
if (mtag) {
u_int16_t *natt_ports;
saidx = isr->saidx;
if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
/* Fillin unspecified SA peers only for transport mode */
ipsec_fill_saidx_bymbuf(&saidx, m, isr->saidx.dst.sa.sa_family);
}
return key_lookup_sa_bysaidx(&saidx);
}
/*
* ipsec_nextisr can return :
* - isr == NULL and error != 0 => something is bad : the packet must be
* discarded
* - isr == NULL and error == 0 => no more rules to apply, ipsec processing
* is done, reinject it in ip stack
* - isr != NULL (error == 0) => we need to apply one rule to the packet
*/
static const struct ipsecrequest *
ipsec_nextisr(struct mbuf *m, const struct ipsecrequest *isr, int af,
int *error, struct secasvar **ret)
{
#define IPSEC_OSTAT(type) \
do { \
switch (isr->saidx.proto) { \
case IPPROTO_ESP: \
ESP_STATINC(ESP_STAT_ ## type); \
break; \
case IPPROTO_AH: \
AH_STATINC(AH_STAT_ ## type); \
break; \
default: \
IPCOMP_STATINC(IPCOMP_STAT_ ## type); \
break; \
} \
} while (/*CONSTCOND*/0)
KASSERTMSG(af == AF_INET || af == AF_INET6,
"invalid address family %u", af);
again:
/*
* Craft SA index to search for proper SA. Note that
* we only fillin unspecified SA peers for transport
* mode; for tunnel mode they must already be filled in.
*/
saidx = isr->saidx;
if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
/* Fillin unspecified SA peers only for transport mode */
ipsec_fill_saidx_bymbuf(&saidx, m, af);
}
/*
* Lookup SA and validate it.
*/
*error = key_checkrequest(isr, &saidx, &sav);
if (*error != 0) {
/*
* IPsec processing is required, but no SA found.
* I assume that key_acquire() had been called
* to get/establish the SA. Here I discard
* this packet because it is responsibility for
* upper layer to retransmit the packet.
*/
IPSEC_STATINC(IPSEC_STAT_OUT_NOSA);
goto bad;
}
/* sav may be NULL here if we have an USE rule */
if (sav == NULL) {
KASSERTMSG(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE,
"no SA found, but required; level %u",
ipsec_get_reqlevel(isr));
isr = isr->next;
/*
* No more rules to apply, return NULL isr and no error.
* It can happen when the last rules are USE rules.
*/
if (isr == NULL) {
*ret = NULL;
*error = 0;
return isr;
}
goto again;
}
/*
* Check system global policy controls.
*/
if ((isr->saidx.proto == IPPROTO_ESP && !esp_enable) ||
(isr->saidx.proto == IPPROTO_AH && !ah_enable) ||
(isr->saidx.proto == IPPROTO_IPCOMP && !ipcomp_enable)) {
IPSECLOG(LOG_DEBUG, "IPsec outbound packet dropped due"
" to policy (check your sysctls)\n");
IPSEC_OSTAT(PDROPS);
*error = EHOSTUNREACH;
KEY_SA_UNREF(&sav);
goto bad;
}
/*
* Sanity check the SA contents for the caller
* before they invoke the xform output method.
*/
KASSERT(sav->tdb_xform != NULL);
*ret = sav;
return isr;
/*
* Check if we need to handle NAT-T fragmentation.
*/
if (isr == isr->sp->req) { /* Check only if called from ipsec4_output */
KASSERT(mtu != NULL);
ip = mtod(m, struct ip *);
if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
goto noneed;
}
if (ntohs(ip->ip_len) <= sav->esp_frag)
goto noneed;
*mtu = sav->esp_frag;
KEY_SA_UNREF(&sav);
return 0;
}
noneed:
dst = &sav->sah->saidx.dst;
/*
* Collect IP_DF state from the outer header.
*/
if (dst->sa.sa_family == AF_INET) {
ip = mtod(m, struct ip *);
/* Honor system-wide control of how to handle IP_DF */
switch (ip4_ipsec_dfbit) {
case 0: /* clear in outer header */
case 1: /* set in outer header */
setdf = ip4_ipsec_dfbit;
break;
default: /* propagate to outer header */
setdf = ip->ip_off;
setdf = ntohs(setdf);
setdf = htons(setdf & IP_DF);
break;
}
} else {
ip = NULL; /* keep compiler happy */
setdf = 0;
}
/* Fix IPv4 header checksum and length */
ip = mtod(m, struct ip *);
ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
/* Encapsulate the packet */
error = ipip_output(m, sav, &mp);
if (mp == NULL && !error) {
/* Should never happen. */
IPSECLOG(LOG_DEBUG,
"ipip_output returns no mbuf and no error!");
error = EFAULT;
}
if (error) {
if (mp) {
/* XXX: Should never happen! */
m_freem(mp);
}
m = NULL; /* ipip_output() already freed it */
goto unrefsav;
}
m = mp, mp = NULL;
/*
* ipip_output clears IP_DF in the new header. If
* we need to propagate IP_DF from the outer header,
* then we have to do it here.
*
* XXX shouldn't assume what ipip_output does.
*/
if (dst->sa.sa_family == AF_INET && setdf) {
if (m->m_len < sizeof(struct ip) &&
(m = m_pullup(m, sizeof(struct ip))) == NULL) {
error = ENOBUFS;
goto unrefsav;
}
ip = mtod(m, struct ip *);
ip->ip_off |= htons(IP_DF);
}
}
/*
* Dispatch to the appropriate IPsec transform logic. The
* packet will be returned for transmission after crypto
* processing, etc. are completed. For encapsulation we
* bypass this call because of the explicit call done above
* (necessary to deal with IP_DF handling for IPv4).
*
* NB: m & sav are ``passed to caller'' who's responsible for
* for reclaiming their resources.
*/
if (sav->tdb_xform->xf_type != XF_IP4) {
if (dst->sa.sa_family == AF_INET) {
ip = mtod(m, struct ip *);
i = ip->ip_hl << 2;
off = offsetof(struct ip, ip_p);
} else {
i = sizeof(struct ip6_hdr);
off = offsetof(struct ip6_hdr, ip6_nxt);
}
error = (*sav->tdb_xform->xf_output)(m, isr, sav, i, off, 0);
} else {
error = ipsec_process_done(m, isr, sav, 0);
}
KEY_SA_UNREF(&sav);
return error;
/*
* chase mbuf chain to find the appropriate place to
* put AH/ESP/IPcomp header.
* IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
*/
while (1) {
switch (nxt) {
case IPPROTO_AH:
case IPPROTO_ESP:
case IPPROTO_IPCOMP:
/*
* We should not skip security header added
* beforehand.
*/
return 0;
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
if (*i + sizeof(ip6e) > m->m_pkthdr.len) {
return EINVAL;
}
/*
* If we see 2nd destination option header,
* we should stop there.
*/
if (nxt == IPPROTO_DSTOPTS && dstopt)
return 0;
if (nxt == IPPROTO_DSTOPTS) {
/*
* Seen 1st or 2nd destination option.
* next time we see one, it must be 2nd.
*/
dstopt = 1;
} else if (nxt == IPPROTO_ROUTING) {
/*
* If we see destination option next
* time, it must be dest2.
*/
dstopt = 2;
}
/* Encapsulate the packet */
error = ipip_output(m, sav, &mp);
if (mp == NULL && !error) {
/* Should never happen. */
IPSECLOG(LOG_DEBUG,
"ipip_output returns no mbuf and no error!");
error = EFAULT;
}
if (error) {
if (mp) {
/* XXX: Should never happen! */
m_freem(mp);
}
m = NULL; /* ipip_output() already freed it */
goto unrefsav;
}
m = mp;
mp = NULL;
}
if (dst->sa.sa_family == AF_INET) {
struct ip *ip;
ip = mtod(m, struct ip *);
i = ip->ip_hl << 2;
off = offsetof(struct ip, ip_p);
} else {
error = compute_ipsec_pos(m, &i, &off);
if (error)
goto unrefsav;
}
error = (*sav->tdb_xform->xf_output)(m, isr, sav, i, off, flags);
KEY_SA_UNREF(&sav);
return error;