Apply by doing:
cd /usr/src
patch -p0 < 018_tcp.patch
Rebuild your kernel.
Update headers.
make includes
Then rebuild and install sysctl:
cd sbin/sysctl
make depend
make
make install
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.125
retrieving revision 1.125.2.2
diff -u -p -r1.125 -r1.125.2.2
--- sys/netinet/tcp_input.c 14 Feb 2003 17:54:46 -0000 1.125
+++ sys/netinet/tcp_input.c 3 Mar 2004 08:40:07 -0000 1.125.2.2
@@ -204,11 +204,23 @@ tcp_reass(tp, th, m, tlen)
* Allocate a new queue entry, before we throw away any data.
* If we can't, just drop the packet. XXX
*/
- tiqe = pool_get(&ipqent_pool, PR_NOWAIT);
+ tiqe = pool_get(&tcpqe_pool, PR_NOWAIT);
if (tiqe == NULL) {
- tcpstat.tcps_rcvmemdrop++;
- m_freem(m);
- return (0);
+ tiqe = LIST_FIRST(&tp->segq);
+ if (tiqe != NULL && th->th_seq == tp->rcv_nxt) {
+ /* Reuse last entry since new segment fills a hole */
+ while ((p = LIST_NEXT(tiqe, ipqe_q)) != NULL)
+ tiqe = p;
+ m_freem(tiqe->ipqe_m);
+ LIST_REMOVE(tiqe, ipqe_q);
+ }
+ if (tiqe == NULL || th->th_seq != tp->rcv_nxt) {
+ /* Flush fragments for this connection */
+ tcp_freeq(tp);
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
}
/*
@@ -235,7 +247,7 @@ tcp_reass(tp, th, m, tlen)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlen;
m_freem(m);
- pool_put(&ipqent_pool, tiqe);
+ pool_put(&tcpqe_pool, tiqe);
return (0);
}
m_adj(m, i);
@@ -265,7 +277,7 @@ tcp_reass(tp, th, m, tlen)
nq = q->ipqe_q.le_next;
m_freem(q->ipqe_m);
LIST_REMOVE(q, ipqe_q);
- pool_put(&ipqent_pool, q);
+ pool_put(&tcpqe_pool, q);
}
/* Insert the new fragment queue entry into place. */
@@ -301,7 +313,7 @@ present:
m_freem(q->ipqe_m);
else
sbappendstream(&so->so_rcv, q->ipqe_m);
- pool_put(&ipqent_pool, q);
+ pool_put(&tcpqe_pool, q);
q = nq;
} while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt);
sorwakeup(so);
@@ -1388,8 +1400,10 @@ findpcb:
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
+ tcp_reass_lock(tp);
(void) tcp_reass(tp, (struct tcphdr *)0,
(struct mbuf *)0, &tlen);
+ tcp_reass_unlock(tp);
/*
* if we didn't have to retransmit the SYN,
* use its rtt as our initial srtt & rtt var.
@@ -1648,8 +1662,10 @@ trimthenstep6:
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
+ tcp_reass_lock(tp);
(void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0,
&tlen);
+ tcp_reass_unlock(tp);
tp->snd_wl1 = th->th_seq - 1;
/* fall into ... */
@@ -2152,8 +2168,10 @@ dodata: /* XXX */
*/
if ((tlen || (tiflags & TH_FIN)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ tcp_reass_lock(tp);
if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL &&
tp->t_state == TCPS_ESTABLISHED) {
+ tcp_reass_unlock(tp);
TCP_SETUP_ACK(tp, tiflags);
tp->rcv_nxt += tlen;
tiflags = th->th_flags & TH_FIN;
@@ -2170,6 +2188,7 @@ dodata: /* XXX */
} else {
m_adj(m, hdroptlen);
tiflags = tcp_reass(tp, th, m, &tlen);
+ tcp_reass_unlock(tp);
tp->t_flags |= TF_ACKNOW;
}
#ifdef TCP_SACK
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.65
retrieving revision 1.65.4.2
diff -u -p -r1.65 -r1.65.4.2
--- sys/netinet/tcp_subr.c 28 Aug 2002 15:43:03 -0000 1.65
+++ sys/netinet/tcp_subr.c 3 Mar 2004 08:40:07 -0000 1.65.4.2
@@ -141,17 +141,18 @@ int tcp_do_ecn = 0; /* RFC3168 ECN enab
#endif
int tcbhashsize = TCBHASHSIZE;
+int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */
+
#ifdef INET6
extern int ip6_defhlim;
#endif /* INET6 */
struct pool tcpcb_pool;
+struct pool tcpqe_pool;
#ifdef TCP_SACK
struct pool sackhl_pool;
#endif
-int tcp_freeq(struct tcpcb *);
-
struct tcpstat tcpstat; /* tcp statistics */
/*
@@ -165,6 +166,9 @@ tcp_init()
#endif /* TCP_COMPAT_42 */
pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
NULL);
+ pool_init(&tcpqe_pool, sizeof(struct ipqent), 0, 0, 0, "tcpqepl",
+ NULL);
+ pool_sethardlimit(&tcpqe_pool, tcp_reass_limit, NULL, 0);
#ifdef TCP_SACK
pool_init(&sackhl_pool, sizeof(struct sackhole), 0, 0, 0, "sackhlpl",
NULL);
@@ -670,7 +674,9 @@ tcp_close(struct tcpcb *tp)
#endif /* RTV_RTT */
/* free the reassembly queue, if any */
+ tcp_reass_lock(tp);
tcp_freeq(tp);
+ tcp_reass_unlock(tp);
tcp_canceltimers(tp);
TCP_CLEAR_DELACK(tp);
@@ -703,7 +709,7 @@ tcp_freeq(struct tcpcb *tp)
while ((qe = LIST_FIRST(&tp->segq)) != NULL) {
LIST_REMOVE(qe, ipqe_q);
m_freem(qe->ipqe_m);
- pool_put(&ipqent_pool, qe);
+ pool_put(&tcpqe_pool, qe);
rv = 1;
}
return (rv);
@@ -712,7 +718,20 @@ tcp_freeq(struct tcpcb *tp)
void
tcp_drain()
{
-
+ struct inpcb *inp;
+
+ /* called at splimp() */
+ CIRCLEQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) {
+ struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+
+ if (tp != NULL) {
+ if (tcp_reass_lock_try(tp) == 0)
+ continue;
+ if (tcp_freeq(tp))
+ tcpstat.tcps_conndrained++;
+ tcp_reass_unlock(tp);
+ }
+ }
}
/*
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.68
retrieving revision 1.68.2.1
diff -u -p -r1.68 -r1.68.2.1
--- sys/netinet/tcp_usrreq.c 12 Feb 2003 14:41:08 -0000 1.68
+++ sys/netinet/tcp_usrreq.c 3 Mar 2004 08:40:07 -0000 1.68.2.1
@@ -877,6 +877,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
void *newp;
size_t newlen;
{
+ int error, nval;
/* All sysctl names at this level are terminal. */
if (namelen != 1)
@@ -931,6 +932,18 @@ tcp_sysctl(name, namelen, oldp, oldlenp,
return (sysctl_int(oldp, oldlenp, newp, newlen,
&tcp_do_ecn));
#endif
+ case TCPCTL_REASS_LIMIT:
+ nval = tcp_reass_limit;
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
+ if (error)
+ return (error);
+ if (nval != tcp_reass_limit) {
+ error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
+ if (error)
+ return (error);
+ tcp_reass_limit = nval;
+ }
+ return (0);
default:
return (ENOPROTOOPT);
}
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.45
retrieving revision 1.45.2.3
diff -u -p -r1.45 -r1.45.2.3
--- sys/netinet/tcp_var.h 12 Feb 2003 14:41:08 -0000 1.45
+++ sys/netinet/tcp_var.h 4 Mar 2004 03:35:15 -0000 1.45.2.3
@@ -86,6 +86,7 @@ struct tcpcb {
#define TF_SEND_CWR 0x00020000 /* send CWR in next seg */
#define TF_DISABLE_ECN 0x00040000 /* disable ECN for this connection */
#endif
+#define TF_REASSLOCK 0x00080000 /* reassembling or draining */
struct mbuf *t_template; /* skeletal packet for transmit */
struct inpcb *t_inpcb; /* back pointer to internet pcb */
@@ -209,6 +210,35 @@ do { \
timeout_del(&(tp)->t_delack_to); \
} \
} while (/*CONSTCOND*/0)
+
+static __inline int tcp_reass_lock_try(struct tcpcb *);
+static __inline void tcp_reass_unlock(struct tcpcb *);
+#define tcp_reass_lock(tp) tcp_reass_lock_try(tp)
+
+static __inline int
+tcp_reass_lock_try(struct tcpcb *tp)
+{
+ int s;
+
+ s = splimp();
+ if (tp->t_flags & TF_REASSLOCK) {
+ splx(s);
+ return (0);
+ }
+ tp->t_flags |= TF_REASSLOCK;
+ splx(s);
+ return (1);
+}
+
+static __inline void
+tcp_reass_unlock(struct tcpcb *tp)
+{
+ int s;
+
+ s = splimp();
+ tp->t_flags &= ~TF_REASSLOCK;
+ splx(s);
+}
#endif /* _KERNEL */
/*
@@ -323,6 +353,8 @@ struct tcpstat {
u_int32_t tcps_cwr_ecn; /* # of cwnd reduced by ecn */
u_int32_t tcps_cwr_frecovery; /* # of cwnd reduced by fastrecovery */
u_int32_t tcps_cwr_timeout; /* # of cwnd reduced by timeout */
+
+ u_int64_t tcps_conndrained; /* # of connections drained */
};
/*
@@ -343,7 +375,8 @@ struct tcpstat {
#define TCPCTL_RSTPPSLIMIT 12 /* RST pps limit */
#define TCPCTL_ACK_ON_PUSH 13 /* ACK immediately on PUSH */
#define TCPCTL_ECN 14 /* RFC3168 ECN */
-#define TCPCTL_MAXID 15
+#define TCPCTL_REASS_LIMIT 15 /* max entries for tcp reass queues */
+#define TCPCTL_MAXID 16
#define TCPCTL_NAMES { \
{ 0, 0 }, \
@@ -361,6 +394,7 @@ struct tcpstat {
{ "rstppslimit", CTLTYPE_INT }, \
{ "ackonpush", CTLTYPE_INT }, \
{ "ecn", CTLTYPE_INT }, \
+ { "reasslimit", CTLTYPE_INT }, \
}
struct tcp_ident_mapping {
@@ -381,10 +415,14 @@ extern struct pool sackhl_pool;
#endif
extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */
+extern struct pool tcpqe_pool;
+extern int tcp_reass_limit; /* max entries for tcp reass queues */
+
int tcp_attach(struct socket *);
void tcp_canceltimers(struct tcpcb *);
struct tcpcb *
tcp_close(struct tcpcb *);
+int tcp_freeq(struct tcpcb *);
#if defined(INET6) && !defined(TCP6)
void tcp6_ctlinput(int, struct sockaddr *, void *);
#endif