/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
*
* NRL grants permission for redistribution and use in source and binary
* forms, with or without modification, of the software and documentation
* created at NRL provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgements:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* This product includes software developed at the Information
* Technology Division, US Naval Research Laboratory.
* 4. Neither the name of the NRL nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation
* are those of the authors and should not be interpreted as representing
* official policies, either expressed or implied, of the US Naval
* Research Laboratory (NRL).
*/
/*-
* Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
* 2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Coyote Point Systems, Inc.
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
* Facility, NASA Ames Research Center.
* This code is derived from software contributed to The NetBSD Foundation
* by Charles M. Hannum.
* This code is derived from software contributed to The NetBSD Foundation
* by Rui Paulo.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
*/
/*
* TODO list for SYN cache stuff:
*
* Find room for a "state" field, which is needed to keep a
* compressed state for TIME_WAIT TCBs. It's been noted already
* that this is fairly important for very high-volume web and
* mail servers, which use a large number of short-lived
* connections.
*/
/*
* We don't estimate RTT with SYNs, so each packet starts with the default
* RTT and each timer step has a fixed timeout value.
*/
static inline void
syn_cache_timer_arm(struct syn_cache *sc)
{
/*
* If there are no entries in the hash table, reinitialize
* the hash secrets.
*/
if (syn_cache_count == 0) {
syn_hash1 = cprng_fast32();
syn_hash2 = cprng_fast32();
}
/*
* Make sure that we don't overflow the per-bucket
* limit or the total cache size limit.
*/
s = splsoftnet();
if (scp->sch_length >= tcp_syn_bucket_limit) {
TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
/*
* The bucket is full. Toss the oldest element in the
* bucket. This will be the first entry in the bucket.
*/
sc2 = TAILQ_FIRST(&scp->sch_bucket);
#ifdef DIAGNOSTIC
/*
* This should never happen; we should always find an
* entry in our bucket.
*/
if (sc2 == NULL)
panic("syn_cache_insert: bucketoverflow: impossible");
#endif
syn_cache_rm(sc2);
syn_cache_put(sc2); /* calls pool_put but see spl above */
} else if (syn_cache_count >= tcp_syn_cache_limit) {
struct syn_cache_head *scp2, *sce;
TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
/*
* The cache is full. Toss the oldest entry in the
* first non-empty bucket we can find.
*
* XXX We would really like to toss the oldest
* entry in the cache, but we hope that this
* condition doesn't happen very often.
*/
scp2 = scp;
if (TAILQ_EMPTY(&scp2->sch_bucket)) {
sce = &tcp_syn_cache[tcp_syn_cache_size];
for (++scp2; scp2 != scp; scp2++) {
if (scp2 >= sce)
scp2 = &tcp_syn_cache[0];
if (! TAILQ_EMPTY(&scp2->sch_bucket))
break;
}
#ifdef DIAGNOSTIC
/*
* This should never happen; we should always find a
* non-empty bucket.
*/
if (scp2 == scp)
panic("syn_cache_insert: cacheoverflow: "
"impossible");
#endif
}
sc2 = TAILQ_FIRST(&scp2->sch_bucket);
syn_cache_rm(sc2);
syn_cache_put(sc2); /* calls pool_put but see spl above */
}
/* Link it from tcpcb entry */
LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
/* Put it into the bucket. */
TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
scp->sch_length++;
syn_cache_count++;
TCP_STATINC(TCP_STAT_SC_ADDED);
splx(s);
}
/*
* Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
* If we have retransmitted an entry the maximum number of times, expire
* that entry.
*/
static void
syn_cache_timer(void *arg)
{
struct syn_cache *sc = arg;
mutex_enter(softnet_lock);
KERNEL_LOCK(1, NULL);
callout_ack(&sc->sc_timer);
if (__predict_false(sc->sc_flags & SCF_DEAD)) {
TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
goto free;
}
if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
/* Drop it -- too many retransmissions. */
goto dropit;
}
/*
* Compute the total amount of time this entry has
* been on a queue. If this entry has been on longer
* than the keep alive timer would allow, expire it.
*/
sc->sc_rxttot += sc->sc_rxtcur;
if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
goto dropit;
/*
* Remove syn cache created by the specified tcb entry,
* because this does not make sense to keep them
* (if there's no tcb entry, syn cache entry will never be used)
*/
void
syn_cache_cleanup(struct tcpcb *tp)
{
struct syn_cache *sc, *nsc;
int s;
#ifdef DIAGNOSTIC
if (sc->sc_tp != tp)
panic("invalid sc_tp in syn_cache_cleanup");
#endif
syn_cache_rm(sc);
syn_cache_put(sc); /* calls pool_put but see spl above */
}
/* just for safety */
LIST_INIT(&tp->t_sc);
splx(s);
}
/*
* Find an entry in the syn cache.
*/
static struct syn_cache *
syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
struct syn_cache_head **headp)
{
struct syn_cache *sc;
struct syn_cache_head *scp;
u_int32_t hash;
int s;
/*
* This function gets called when we receive an ACK for a socket in the
* LISTEN state. We look up the connection in the syn cache, and if it's
* there, we pull it out of the cache and turn it into a full-blown
* connection in the SYN-RECEIVED state.
*
* The return values may not be immediately obvious, and their effects
* can be subtle, so here they are:
*
* NULL SYN was not found in cache; caller should drop the
* packet and send an RST.
*
* -1 We were unable to create the new connection, and are
* aborting it. An ACK,RST is being sent to the peer
* (unless we got screwey sequence numbers; see below),
* because the 3-way handshake has been completed. Caller
* should not free the mbuf, since we may be using it. If
* we are not, we will free it.
*
* Otherwise, the return value is a pointer to the new socket
* associated with the connection.
*/
struct socket *
syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
struct tcphdr *th, struct socket *so, struct mbuf *m)
{
struct syn_cache *sc;
struct syn_cache_head *scp;
struct inpcb *inp = NULL;
struct tcpcb *tp;
int s;
struct socket *oso;
s = splsoftnet();
if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
splx(s);
return NULL;
}
/* Remove this cache entry */
syn_cache_rm(sc);
splx(s);
/*
* Ok, create the full blown connection, and set things up
* as they would have been set up if we had created the
* connection when the SYN arrived. If we can't create
* the connection, abort it.
*/
/*
* inp still has the OLD in_pcb stuff, set the
* v6-related flags on the new guy, too. This is
* done particularly for the case where an AF_INET6
* socket is bound only to a port, and a v4 connection
* comes in on that port.
* we also copy the flowinfo from the original pcb
* to the new one.
*/
oso = so;
so = sonewconn(so, true);
if (so == NULL)
goto resetandabort;
if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
tp->t_flags |= TF_WILL_SACK;
if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
tp->t_flags |= TF_ECN_PERMIT;
#ifdef TCP_SIGNATURE
if (sc->sc_flags & SCF_SIGNATURE)
tp->t_flags |= TF_SIGNATURE;
#endif
/* Initialize tp->t_ourmss before we deal with the peer's! */
tp->t_ourmss = sc->sc_ourmaxseg;
tcp_mss_from_peer(tp, sc->sc_peermaxseg);
/*
* Initialize the initial congestion window. If we
* had to retransmit the SYN,ACK, we must initialize cwnd
* to 1 segment (i.e. the Loss Window).
*/
if (sc->sc_rxtshift)
tp->snd_cwnd = tp->t_peermss;
else {
int ss = tcp_init_win;
if (inp->inp_af == AF_INET && in_localaddr(in4p_faddr(inp)))
ss = tcp_init_win_local;
#ifdef INET6
else if (inp->inp_af == AF_INET6 && in6_localaddr(&in6p_faddr(inp)))
ss = tcp_init_win_local;
#endif
tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
}
/*
* This is what would have happened in tcp_output() when
* the SYN,ACK was sent.
*/
tp->snd_up = tp->snd_una;
tp->snd_max = tp->snd_nxt = tp->iss+1;
TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
tp->last_ack_sent = tp->rcv_nxt;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
TCP_STATINC(TCP_STAT_SC_COMPLETED);
s = splsoftnet();
syn_cache_put(sc);
splx(s);
return so;
resetandabort:
(void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
abort:
if (so != NULL) {
(void) soqremque(so, 1);
(void) soabort(so);
mutex_enter(softnet_lock);
}
s = splsoftnet();
syn_cache_put(sc);
splx(s);
TCP_STATINC(TCP_STAT_SC_ABORTED);
return ((struct socket *)(-1));
}
/*
* This function is called when we get a RST for a
* non-existent connection, so that we can see if the
* connection is in the syn cache. If it is, zap it.
*/
s = splsoftnet();
if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
splx(s);
return;
}
/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
if (ntohl(th->th_seq) != sc->sc_iss) {
splx(s);
return;
}
/*
* If we've retransmitted 3 times and this is our second error,
* we remove the entry. Otherwise, we allow it to continue on.
* This prevents us from incorrectly nuking an entry during a
* spurious network outage.
*
* See tcp_notify().
*/
if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
sc->sc_flags |= SCF_UNREACH;
splx(s);
return;
}
syn_cache_rm(sc);
TCP_STATINC(TCP_STAT_SC_UNREACH);
syn_cache_put(sc); /* calls pool_put but see spl above */
splx(s);
}
/*
* Given a LISTEN socket and an inbound SYN request, add this to the syn
* cache, and send back a segment:
* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
* to the source.
*
* IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
* Doing so would require that we hold onto the data and deliver it
* to the application. However, if we are the target of a SYN-flood
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
*/
int
syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
int optlen, struct tcp_opt_info *oi)
{
struct tcpcb tb, *tp;
long win;
struct syn_cache *sc;
struct syn_cache_head *scp;
struct mbuf *ipopts;
int s;
tp = sototcpcb(so);
/*
* Initialize some local state.
*/
win = sbspace(&so->so_rcv);
if (win > TCP_MAXWIN)
win = TCP_MAXWIN;
switch (src->sa_family) {
case AF_INET:
/* Remember the IP options, if any. */
ipopts = ip_srcroute(m);
break;
default:
ipopts = NULL;
}
/*
* See if we already have an entry for this connection.
* If we do, resend the SYN,ACK. We do not count this
* as a retransmission (XXX though maybe we should).
*/
if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
TCP_STATINC(TCP_STAT_SC_DUPESYN);
if (ipopts) {
/*
* If we were remembering a previous source route,
* forget it and use the new one we've been given.
*/
if (sc->sc_ipopts)
(void)m_free(sc->sc_ipopts);
sc->sc_ipopts = ipopts;
}
sc->sc_timestamp = tb.ts_recent;
m_freem(m);
if (syn_cache_respond(sc) == 0) {
net_stat_ref_t tcps = TCP_STAT_GETREF();
_NET_STATINC_REF(tcps, TCP_STAT_SNDACKS);
_NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL);
TCP_STAT_PUTREF();
}
return 1;
}
s = splsoftnet();
sc = pool_get(&syn_cache_pool, PR_NOWAIT);
splx(s);
if (sc == NULL) {
if (ipopts)
(void)m_free(ipopts);
return 0;
}
/*
* Fill in the cache, and put the necessary IP and TCP
* options into the reply.
*/
memset(sc, 0, sizeof(struct syn_cache));
callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
memcpy(&sc->sc_src, src, src->sa_len);
memcpy(&sc->sc_dst, dst, dst->sa_len);
sc->sc_flags = 0;
sc->sc_ipopts = ipopts;
sc->sc_irs = th->th_seq;
switch (src->sa_family) {
case AF_INET:
{
struct sockaddr_in *srcin = (void *)src;
struct sockaddr_in *dstin = (void *)dst;
sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
&srcin6->sin6_addr, dstin6->sin6_port,
srcin6->sin6_port, sizeof(dstin6->sin6_addr));
break;
}
#endif
}
sc->sc_peermaxseg = oi->maxseg;
sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
sc->sc_win = win;
sc->sc_timebase = tcp_now - 1; /* see tcp_newtcpcb() */
sc->sc_timestamp = tb.ts_recent;
if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
(TF_REQ_TSTMP|TF_RCVD_TSTMP))
sc->sc_flags |= SCF_TIMESTAMP;
if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
sc->sc_requested_s_scale = tb.requested_s_scale;
sc->sc_request_r_scale = 0;
/*
* Pick the smallest possible scaling factor that
* will still allow us to scale up to sb_max.
*
* We do this because there are broken firewalls that
* will corrupt the window scale option, leading to
* the other endpoint believing that our advertised
* window is unscaled. At scale factors larger than
* 5 the unscaled window will drop below 1500 bytes,
* leading to serious problems when traversing these
* broken firewalls.
*
* With the default sbmax of 256K, a scale factor
* of 3 will be chosen by this algorithm. Those who
* choose a larger sbmax should watch out
* for the compatibility problems mentioned above.
*
* RFC1323: The Window field in a SYN (i.e., a <SYN>
* or <SYN,ACK>) segment itself is never scaled.
*/
while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
(TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
sc->sc_request_r_scale++;
} else {
sc->sc_requested_s_scale = 15;
sc->sc_request_r_scale = 15;
}
if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
sc->sc_flags |= SCF_SACK_PERMIT;
if (sc->sc_flags & SCF_SACK_PERMIT) {
/* Let the peer know that we will SACK. */
*optp++ = TCPOPT_SACK_PERMITTED;
*optp++ = TCPOLEN_SACK_PERMITTED;
optlen += TCPOLEN_SACK_PERMITTED;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
while (optlen % 4 != 2) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
*optp++ = TCPOPT_TIMESTAMP;
*optp++ = TCPOLEN_TIMESTAMP;
u_int32_t *lp = (u_int32_t *)(optp);
/* Form timestamp option as shown in appendix A of RFC 1323. */
*lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
*lp = htonl(sc->sc_timestamp);
optp += TCPOLEN_TIMESTAMP - 2;
optlen += TCPOLEN_TIMESTAMP;
}
#ifdef TCP_SIGNATURE
if (sc->sc_flags & SCF_SIGNATURE) {
sav = tcp_signature_getsav(m);
if (sav == NULL) {
m_freem(m);
return EPERM;
}
/*
* Terminate and pad TCP options to a 4 byte boundary.
*
* According to RFC793: "The content of the header beyond the
* End-of-Option option must be header padding (i.e., zero)."
* And later: "The padding is composed of zeros."
*/
if (optlen % 4) {
optlen += TCPOLEN_EOL;
*optp++ = TCPOPT_EOL;
}
while (optlen % 4) {
optlen += TCPOLEN_PAD;
*optp++ = TCPOPT_PAD;
}
/* Compute the actual values now that we've added the options. */
tlen = hlen + sizeof(struct tcphdr) + optlen;
m->m_len = m->m_pkthdr.len = tlen;
th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
/*
* Send ECN SYN-ACK setup packet.
* Routes can be asymmetric, so, even if we receive a packet
* with ECE and CWR set, we must not assume no one will block
* the ECE packet we are about to send.
*/
if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
th->th_flags |= TH_ECE;
TCP_STATINC(TCP_STAT_ECN_SHS);
/*
* draft-ietf-tcpm-ecnsyn-00.txt
*
* "[...] a TCP node MAY respond to an ECN-setup
* SYN packet by setting ECT in the responding
* ECN-setup SYN/ACK packet, indicating to routers
* that the SYN/ACK packet is ECN-Capable.
* This allows a congested router along the path
* to mark the packet instead of dropping the
* packet as an indication of congestion."
*
* "[...] There can be a great benefit in setting
* an ECN-capable codepoint in SYN/ACK packets [...]
* Congestion is most likely to occur in
* the server-to-client direction. As a result,
* setting an ECN-capable codepoint in SYN/ACK
* packets can reduce the occurrence of three-second
* retransmit timeouts resulting from the drop
* of SYN/ACK packets."
*
* Page 4 and 6, January 2006.
*/