/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* When set to 1, IPsec will send packets with the same sequence number.
* This allows to verify if the other side has proper replay attacks detection.
*/
int ipsec_replay = 0;
/*
* When set 1, IPsec will send packets with corrupted HMAC.
* This allows to verify if the other side properly detects modified packets.
*/
int ipsec_integrity = 0;
#else
int ipsec_debug = 0;
#endif
percpu_t *ipsecstat_percpu;
int ip4_ah_offsetmask = 0; /* maybe IP_DF? */
int ip4_ipsec_dfbit = 2; /* DF bit on encap. 0: clear 1: set 2: copy */
int ip4_esp_trans_deflev = IPSEC_LEVEL_USE;
int ip4_esp_net_deflev = IPSEC_LEVEL_USE;
int ip4_ah_trans_deflev = IPSEC_LEVEL_USE;
int ip4_ah_net_deflev = IPSEC_LEVEL_USE;
struct secpolicy ip4_def_policy;
int ip4_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */
u_int ipsec_spdgen = 1; /* SPD generation # */
static struct secpolicy ipsec_dummy_sp __read_mostly = {
.state = IPSEC_SPSTATE_ALIVE,
/* If ENTRUST, the dummy SP never be used. See ipsec_getpolicybysock. */
.policy = IPSEC_POLICY_ENTRUST,
};
/*
* Checking the generation and sp->state and taking a reference to an SP
* must be in a critical section of pserialize. See key_unlink_sp.
*/
s = pserialize_read_enter();
/* SPD table change invalidate all the caches. */
if (ipsec_spdgen != pcbsp->sp_cache[dir].cachegen) {
ipsec_invalpcbcache(pcbsp, dir);
goto out;
}
sp = pcbsp->sp_cache[dir].cachesp;
if (sp == NULL)
goto out;
if (sp->state != IPSEC_SPSTATE_ALIVE) {
sp = NULL;
ipsec_invalpcbcache(pcbsp, dir);
goto out;
}
if ((pcbsp->sp_cacheflags & IPSEC_PCBSP_CONNECTED) == 0) {
/* NB: assume ipsec_setspidx never sleep */
if (ipsec_setspidx(m, &spidx, dir, 1) != 0) {
sp = NULL;
goto out;
}
/*
* We have to make an exact match here since the cached rule
* might have lower priority than a rule that would otherwise
* have matched the packet.
*/
if (memcmp(&pcbsp->sp_cache[dir].cacheidx, &spidx,
sizeof(spidx))) {
sp = NULL;
goto out;
}
} else {
/*
* The pcb is connected, and the L4 code is sure that:
* - outgoing side uses inp_[lf]addr
* - incoming side looks up policy after inpcb lookup
* and address pair is know to be stable. We do not need
* to generate spidx again, nor check the address match again.
*
* For IPv4/v6 SOCK_STREAM sockets, this assumptions holds
* and there are calls to ipsec_pcbconn() from inpcb_connect().
*/
}
pcbsp->sp_cache[dir].cachesp = NULL;
pcbsp->sp_cache[dir].cachehint = IPSEC_PCBHINT_UNKNOWN;
if (ipsec_setspidx(m, &pcbsp->sp_cache[dir].cacheidx, dir, 1) != 0) {
return EINVAL;
}
pcbsp->sp_cache[dir].cachesp = sp;
if (pcbsp->sp_cache[dir].cachesp) {
/*
* If the PCB is connected, we can remember a hint to
* possibly short-circuit IPsec processing in other places.
*/
if (pcbsp->sp_cacheflags & IPSEC_PCBSP_CONNECTED) {
switch (pcbsp->sp_cache[dir].cachesp->policy) {
case IPSEC_POLICY_NONE:
case IPSEC_POLICY_BYPASS:
pcbsp->sp_cache[dir].cachehint =
IPSEC_PCBHINT_NO;
break;
default:
pcbsp->sp_cache[dir].cachehint =
IPSEC_PCBHINT_YES;
}
}
}
pcbsp->sp_cache[dir].cachegen = ipsec_spdgen;
return 0;
}
static int
ipsec_invalpcbcache(struct inpcbpolicy *pcbsp, int dir)
{
int i;
KASSERT(inp_locked(pcbsp->sp_inp));
for (i = IPSEC_DIR_INBOUND; i <= IPSEC_DIR_OUTBOUND; i++) {
if (dir != IPSEC_DIR_ANY && i != dir)
continue;
pcbsp->sp_cache[i].cachesp = NULL;
pcbsp->sp_cache[i].cachehint = IPSEC_PCBHINT_UNKNOWN;
pcbsp->sp_cache[i].cachegen = 0;
memset(&pcbsp->sp_cache[i].cacheidx, 0,
sizeof(pcbsp->sp_cache[i].cacheidx));
}
return 0;
}
if (ipsec_spdgen == UINT_MAX)
ipsec_spdgen = 1;
else
ipsec_spdgen++;
}
/*
* Return a held reference to the default SP.
*/
static struct secpolicy *
key_get_default_sp(int af, const char *where, int tag)
{
struct secpolicy *sp;
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP, "DP from %s:%u\n", where, tag);
switch(af) {
case AF_INET:
sp = &ip4_def_policy;
break;
#ifdef INET6
case AF_INET6:
sp = &ip6_def_policy;
break;
#endif
default:
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
"unexpected protocol family %u\n", af);
return NULL;
}
/*
* For OUTBOUND packet having a socket. Searching SPD for packet,
* and return a pointer to SP.
* OUT: NULL: no appropriate SP found, the following value is set to error.
* 0 : bypass
* EACCES : discard packet.
* ENOENT : ipsec_acquire() in progress, maybe.
* others : error occurred.
* others: a pointer to SP
*
* NOTE: IPv6 mapped address concern is implemented here.
*/
static struct secpolicy *
ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb *inp,
int *error)
{
struct inpcbpolicy *pcbsp = NULL;
struct secpolicy *currsp = NULL; /* policy on socket */
struct secpolicy *sp;
int af;
/* XXX FIXME inpcb vs socket*/
af = inp->inp_af;
KASSERTMSG(af == AF_INET || af == AF_INET6,
"unexpected protocol family %u", af);
KASSERT(inp->inp_sp != NULL);
/* If we have a cached entry, and if it is still valid, use it. */
IPSEC_STATINC(IPSEC_STAT_SPDCACHELOOKUP);
currsp = ipsec_checkpcbcache(m, inp->inp_sp, dir);
if (currsp) {
*error = 0;
return currsp;
}
IPSEC_STATINC(IPSEC_STAT_SPDCACHEMISS);
switch (af) {
case AF_INET:
#if defined(INET6)
case AF_INET6:
#endif
*error = ipsec_setspidx_inpcb(m, inp);
pcbsp = inp->inp_sp;
break;
default:
*error = EPFNOSUPPORT;
break;
}
if (*error)
return NULL;
if (pcbsp->priv) { /* when privileged socket */
switch (currsp->policy) {
case IPSEC_POLICY_BYPASS:
case IPSEC_POLICY_IPSEC:
KEY_SP_REF(currsp);
sp = currsp;
break;
case IPSEC_POLICY_ENTRUST:
/* look for a policy in SPD */
if (key_havesp(dir))
sp = KEY_LOOKUP_SP_BYSPIDX(&currsp->spidx, dir);
else
sp = NULL;
if (sp == NULL) /* no SP found */
sp = KEY_GET_DEFAULT_SP(af);
break;
default:
IPSECLOG(LOG_ERR, "Invalid policy for PCB %d\n",
currsp->policy);
*error = EINVAL;
return NULL;
}
} else { /* unpriv, SPD has policy */
if (key_havesp(dir))
sp = KEY_LOOKUP_SP_BYSPIDX(&currsp->spidx, dir);
else
sp = NULL;
if (sp == NULL) { /* no SP found */
switch (currsp->policy) {
case IPSEC_POLICY_BYPASS:
IPSECLOG(LOG_ERR, "Illegal policy for "
"non-privileged defined %d\n",
currsp->policy);
*error = EINVAL;
return NULL;
case IPSEC_POLICY_ENTRUST:
sp = KEY_GET_DEFAULT_SP(af);
break;
case IPSEC_POLICY_IPSEC:
KEY_SP_REF(currsp);
sp = currsp;
break;
/*
* For FORWARDING packet or OUTBOUND without a socket. Searching SPD for packet,
* and return a pointer to SP.
* OUT: positive: a pointer to the entry for security policy leaf matched.
* NULL: no appropriate SP found, the following value is set to error.
* 0 : bypass
* EACCES : discard packet.
* ENOENT : ipsec_acquire() in progress, maybe.
* others : error occurred.
*/
static struct secpolicy *
ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error)
{
struct secpolicyindex spidx;
struct secpolicy *sp;
/* Make an index to look for a policy. */
*error = ipsec_setspidx(m, &spidx, dir, 1);
if (*error != 0) {
IPSECLOG(LOG_DEBUG, "setpidx failed, dir %u flag %u\n", dir, flag);
memset(&spidx, 0, sizeof(spidx));
return NULL;
}
spidx.dir = dir;
if (key_havesp(dir)) {
sp = KEY_LOOKUP_SP_BYSPIDX(&spidx, dir);
}
if (sp == NULL) {
/* no SP found, use system default */
sp = KEY_GET_DEFAULT_SP(spidx.dst.sa.sa_family);
}
int
ipsec4_output(struct mbuf *m, struct inpcb *inp, int flags,
u_long *mtu, bool *natt_frag, bool *done, bool *count_drop)
{
struct secpolicy *sp = NULL;
u_long _mtu = 0;
int error;
/*
* Check the security policy (SP) for the packet and, if required,
* do IPsec-related processing. There are two cases here; the first
* time a packet is sent through it will be untagged and handled by
* ipsec_checkpolicy(). If the packet is resubmitted to ip_output
* (e.g. after AH, ESP, etc. processing), there will be a tag to
* bypass the lookup and related policy checking.
*/
if (ipsec_outdone(m)) {
return 0;
}
if (inp && ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) {
return 0;
}
sp = ipsec_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, &error, inp);
/*
* There are four return cases:
* sp != NULL apply IPsec policy
* sp == NULL, error == 0 no IPsec handling needed
* sp == NULL, error == -EINVAL discard packet w/o error
* sp == NULL, error != 0 discard packet, report error
*/
if (sp == NULL) {
if (error) {
/*
* Hack: -EINVAL is used to signal that a packet
* should be silently discarded. This is typically
* because we asked key management for an SA and
* it was delayed (e.g. kicked up to IKE).
*/
if (error == -EINVAL)
error = 0;
m_freem(m);
*done = true;
*count_drop = true;
return error;
}
/* No IPsec processing for this packet. */
return 0;
}
/*
* Do delayed checksums now because we send before
* this is done in the normal processing path.
*/
if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
in_undefer_cksum_tcpudp(m);
m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
}
error = ipsec4_process_packet(m, sp->req, &_mtu);
if (error == 0 && _mtu != 0) {
/*
* NAT-T ESP fragmentation: do not do IPSec processing
* now, we will do it on each fragmented packet.
*/
*mtu = _mtu;
*natt_frag = true;
KEY_SP_UNREF(&sp);
return 0;
}
/*
* Preserve KAME behaviour: ENOENT can be returned
* when an SA acquire is in progress. Don't propagate
* this to user-level; it confuses applications.
*
* XXX this will go away when the SADB is redone.
*/
if (error == ENOENT)
error = 0;
KEY_SP_UNREF(&sp);
*done = true;
return error;
}
int
ipsec_ip_input_checkpolicy(struct mbuf *m, bool forward)
{
struct secpolicy *sp;
int error;
error = ipsec_in_reject(m, NULL);
if (error) {
return EINVAL;
}
if (!forward || !(m->m_flags & M_CANFASTFWD)) {
return 0;
}
/*
* Peek at the outbound SP for this packet to determine if
* it is a Fast Forward candidate.
*/
sp = ipsec_checkpolicy(m, IPSEC_DIR_OUTBOUND, IP_FORWARDING,
&error, NULL);
if (sp != NULL) {
m->m_flags &= ~M_CANFASTFWD;
KEY_SP_UNREF(&sp);
}
return 0;
}
/*
* If the packet is routed over IPsec tunnel, tell the originator the
* tunnel MTU.
* tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
*
* XXX: Quick hack!!!
*
* XXX: And what if the MTU goes negative?
*/
void
ipsec_mtu(struct mbuf *m, int *destmtu)
{
struct secpolicy *sp;
size_t ipsechdr;
int error;
/*
* configure security policy index (src/dst/proto/sport/dport)
* by looking at the content of mbuf.
* the caller is responsible for error recovery (like clearing up spidx).
*/
static int
ipsec_setspidx(struct mbuf *m, struct secpolicyindex *spidx, int dir,
int needport)
{
struct ip *ip = NULL;
struct ip ipbuf;
u_int v;
int error;
if (m->m_len >= sizeof(*ip)) {
ip = mtod(m, struct ip *);
} else {
m_copydata(m, 0, sizeof(ipbuf), &ipbuf);
ip = &ipbuf;
}
v = ip->ip_v;
switch (v) {
case 4:
error = ipsec4_setspidx_ipaddr(m, spidx);
if (error)
return error;
ipsec4_get_ulp(m, spidx, needport);
return 0;
#ifdef INET6
case 6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
"pkthdr.len(%d) < sizeof(struct ip6_hdr), "
"ignored.\n", m->m_pkthdr.len);
return EINVAL;
}
error = ipsec6_setspidx_ipaddr(m, spidx);
if (error)
return error;
ipsec6_get_ulp(m, spidx, needport);
return 0;
#endif
default:
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
"unknown IP version %u, ignored.\n", v);
return EINVAL;
}
}
static void
ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
{
u_int8_t nxt;
int off;
KASSERT(m != NULL);
KASSERTMSG(m->m_pkthdr.len >= sizeof(struct ip), "packet too short");
/* NB: ip_input() flips it into host endian XXX need more checking */
if (m->m_len >= sizeof(struct ip)) {
struct ip *ip = mtod(m, struct ip *);
if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
goto done;
off = ip->ip_hl << 2;
nxt = ip->ip_p;
} else {
struct ip ih;
if (sp == &ipsec_dummy_sp) {
; /* It's dummy. No need to free it. */
} else {
/*
* We cannot destroy here because it can be called in
* softint. So mark the SP as DEAD and let the timer
* destroy it. See key_timehandler_spd.
*/
sp->state = IPSEC_SPSTATE_DEAD;
}
}
/*
* Check security policy requirements against the actual packet contents.
*
* If the SP requires an IPsec packet, and the packet was neither AH nor ESP,
* then kick it.
*/
static int
ipsec_sp_reject(const struct secpolicy *sp, const struct mbuf *m)
{
struct ipsecrequest *isr;
if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
printf("%s: using SP\n", __func__);
kdebug_secpolicy(sp);
}
/* check policy */
switch (sp->policy) {
case IPSEC_POLICY_DISCARD:
return 1;
case IPSEC_POLICY_BYPASS:
case IPSEC_POLICY_NONE:
return 0;
}
/* XXX should compare policy against ipsec header history */
for (isr = sp->req; isr != NULL; isr = isr->next) {
if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE)
continue;
switch (isr->saidx.proto) {
case IPPROTO_ESP:
if ((m->m_flags & M_DECRYPTED) == 0) {
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
"ESP m_flags:%x\n", m->m_flags);
return 1;
}
break;
case IPPROTO_AH:
if ((m->m_flags & M_AUTHIPHDR) == 0) {
KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
"AH m_flags:%x\n", m->m_flags);
return 1;
}
break;
case IPPROTO_IPCOMP:
/*
* We don't really care, as IPcomp document
* says that we shouldn't compress small
* packets, IPComp policy should always be
* treated as being in "use" level.
*/
break;
}
}
return 0;
}
/*
* Check security policy requirements.
*/
int
ipsec_in_reject(struct mbuf *m, struct inpcb *inp)
{
struct secpolicy *sp;
int error;
int result;
if (sp != NULL) {
result = ipsec_sp_reject(sp, m);
if (result)
IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
KEY_SP_UNREF(&sp);
} else {
result = 0;
}
return result;
}
/*
* Compute the byte size to be occupied by the IPsec header. If it is
* tunneled, it includes the size of outer IP header.
*/
static size_t
ipsec_sp_hdrsiz(const struct secpolicy *sp, const struct mbuf *m)
{
struct ipsecrequest *isr;
size_t siz;
if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
printf("%s: using SP\n", __func__);
kdebug_secpolicy(sp);
}
switch (sp->policy) {
case IPSEC_POLICY_DISCARD:
case IPSEC_POLICY_BYPASS:
case IPSEC_POLICY_NONE:
return 0;
}
/*
* Check the variable replay window.
* ipsec_chkreplay() performs replay check before ICV verification.
* ipsec_updatereplay() updates replay bitmap. This must be called after
* ICV verification (it also performs replay check, which is usually done
* beforehand).
* 0 (zero) is returned if packet disallowed, 1 if packet permitted.
*
* based on RFC 2401.
*/
int
ipsec_chkreplay(u_int32_t seq, const struct secasvar *sav)
{
const struct secreplay *replay;
u_int32_t diff;
int fr;
u_int32_t wsizeb; /* constant: bits of window size */
int frlast; /* constant: last frame */
/* sequence number of 0 is invalid */
if (seq == 0)
return 1;
/* first time */
if (replay->count == 0) {
replay->lastseq = seq;
memset(replay->bitmap, 0, replay->wsize);
(replay->bitmap)[frlast] = 1;
goto ok;
}
if (seq > replay->lastseq) {
/* seq is larger than lastseq. */
diff = seq - replay->lastseq;
/* new larger sequence number */
if (diff < wsizeb) {
/* In window */
/* set bit for this packet */
vshiftl(replay->bitmap, diff, replay->wsize);
(replay->bitmap)[frlast] |= 1;
} else {
/* this packet has a "way larger" */
memset(replay->bitmap, 0, replay->wsize);
(replay->bitmap)[frlast] = 1;
}
replay->lastseq = seq;
/* larger is good */
} else {
/* seq is equal or less than lastseq. */
diff = replay->lastseq - seq;
/* over range to check, i.e. too old or wrapped */
if (diff >= wsizeb)
return 1;
fr = frlast - diff / 8;
/* this packet already seen ? */
if ((replay->bitmap)[fr] & (1 << (diff % 8)))
return 1;
/* mark as seen */
(replay->bitmap)[fr] |= (1 << (diff % 8));
/* out of order but good */
}
ok:
if (replay->count == ~0) {
char buf[IPSEC_LOGSASTRLEN];
/* set overflow flag */
replay->overflow++;
/* don't increment, no more packets accepted */
if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0)
return 1;
/*
* shift variable length buffer to left.
* IN: bitmap: pointer to the buffer
* nbit: the number of to shift.
* wsize: buffer size (bytes).
*/
static void
vshiftl(unsigned char *bitmap, int nbit, int wsize)
{
int s, j, i;
unsigned char over;
for (j = 0; j < nbit; j += 8) {
s = (nbit - j < 8) ? (nbit - j): 8;
bitmap[0] <<= s;
for (i = 1; i < wsize; i++) {
over = (bitmap[i] >> (8 - s));
bitmap[i] <<= s;
bitmap[i-1] |= over;
}
}
return;
}
/* Return a printable string for the address. */
const char *
ipsec_address(const union sockaddr_union *sa, char *buf, size_t size)
{
switch (sa->sa.sa_family) {
case AF_INET:
in_print(buf, size, &sa->sin.sin_addr);
return buf;
#if INET6
case AF_INET6:
in6_print(buf, size, &sa->sin6.sin6_addr);
return buf;
#endif
default:
return "(unknown address family)";
}
}
/*
* calculate UDP checksum for UDP encapsulated ESP for IPv6.
*
* RFC2460(Internet Protocol, Version 6 Specification) says:
*
* IPv6 receivers MUST discard UDP packets with a zero checksum.
*
* There is more relaxed specification RFC6935(IPv6 and UDP Checksums for
* Tunneled Packets). The document allows zero checksum. It's too
* late to publish, there are a lot of interoperability problems...
*/
void
ipsec6_udp_cksum(struct mbuf *m)
{
struct ip6_hdr *ip6;
uint16_t plen, uh_sum;
int off;
/* must called after m_pullup() */
KASSERT(m->m_len >= sizeof(struct ip6_hdr));