? sys/net/old
? sys/net/old2
? sys/netinet/tcpsign
Index: sys/net/route.c
===================================================================
RCS file: /cvsroot/src/sys/net/route.c,v
retrieving revision 1.98
diff -u -p -r1.98 route.c
--- sys/net/route.c 10 Oct 2007 22:14:38 -0000 1.98
+++ sys/net/route.c 12 Nov 2007 17:14:39 -0000
@@ -147,6 +147,7 @@ struct callout rt_timer_ch; /* callout f
static int _rtcache_debug = 0;
#endif /* RTFLUSH_DEBUG */
+struct rtentry *rtgethead(const struct sockaddr *, const struct sockaddr *);
static int rtdeletemsg(struct rtentry *);
static int rtflushclone1(struct rtentry *, void *);
static void rtflushclone(sa_family_t family, struct rtentry *);
@@ -304,6 +305,34 @@ rtalloc(struct route *ro)
rtcache(ro);
}
+/*
+ * Returns rtentry in a RR fashion
+ * rt should be the first path
+ */
+struct rtentry *
+rtchoosepath_rr(struct rtentry *rt)
+{
+ rt->rt_last = rtnext(rt->rt_last);
+ return rt->rt_last;
+}
+
+/*
+ * Next rtentry that it's UP (in case there is such thing)
+ * If none is found return the feeded rtentry
+ */
+struct rtentry *
+rtnext(struct rtentry *rt)
+{
+ struct rtentry *retrt, *sentinel;
+
+ KASSERT(rt != NULL);
+ CLIST_FOREACH(retrt, CLIST_NEXT(rt, rt_list), sentinel, rt_list)
+ if (retrt->rt_flags & RTF_UP)
+ return retrt;
+
+ return rt;
+}
+
struct rtentry *
rtalloc1(const struct sockaddr *dst, int report)
{
@@ -355,28 +384,81 @@ rtalloc1(const struct sockaddr *dst, int
return newrt;
}
+/*
+ * returns head of the list
+ * just a rnh_lookup wrapper
+ */
+struct rtentry *
+rtgethead(const struct sockaddr *dst, const struct sockaddr *netmask)
+{
+ struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct rtentry *rt = NULL;
+ struct radix_node *rn;
+ int s = splsoftnet();
+
+ if (rnh && (rn = rnh->rnh_lookup(dst, netmask, rnh)) &&
+ ((rn->rn_flags & RNF_ROOT) == 0))
+ rt = (struct rtentry *)rn;
+ else
+ rtstat.rts_unreach++;
+
+ splx(s);
+ return rt;
+}
+
void
rtfree(struct rtentry *rt)
{
- struct ifaddr *ifa;
+ struct rtentry *rthead;
if (rt == NULL)
panic("rtfree");
rt->rt_refcnt--;
if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
- if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
- panic ("rtfree 2");
rttrash--;
if (rt->rt_refcnt < 0) {
printf("rtfree: %p not freed (neg refs)\n", rt);
return;
}
+ rthead = RTFIRST(rt);
+ rthead->rt_total--;
+ if (rthead->rt_total == 0 &&
+ (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)))
+ panic("rtfree 2");
rt_timer_remove_all(rt, 0);
- ifa = rt->rt_ifa;
- rt->rt_ifa = NULL;
- IFAFREE(ifa);
- rt->rt_ifp = NULL;
- rt_destroy(rt);
+ IFAFREE(rt->rt_ifa);
+ if (rthead->rt_total == 0) {
+ /* No other paths */
+ rt_destroy(rt);
+ } else if (rthead == rt) {
+ /* First GW to delete from more */
+ struct radix_node_head *rnh;
+ struct rtentry *srt = CLIST_NEXT(rthead, rt_list),
+ *sen, *rtin;
+ KASSERT(rt != srt);
+ srt->rt_total = rt->rt_total;
+ srt->rt_last = srt;
+ CLIST_REMOVE(rt, rt_list);
+ if ((rnh = rt_tables[rt_getkey(rt)->sa_family]) == NULL)
+ panic("rtfree: rt_tables");
+ if (rnh->rnh_deladdr(rt_getkey(rt), rt_mask(rt), rnh) == NULL)
+ panic("rtfree: deladdr");
+ if (rnh->rnh_addaddr(rt_getkey(srt), rt_mask(srt), rnh,
+ srt->rt_nodes) == NULL)
+ panic("rtfree: addaddr");
+ CLIST_FOREACH(rtin, srt, sen, rt_list)
+ RTFIRST(rtin) = srt;
+ } else {
+ /* Delete a non-first path */
+ CLIST_REMOVE(rt, rt_list);
+ if (rthead->rt_last == rt)
+ rthead->rt_last = rthead;
+ }
+
+ if (rt->rt_gateway != NULL)
+ sockaddr_free(rt->rt_gateway);
+ /* do I really need this ? I also Bzero at pool_get */
+ Bzero(rt, sizeof(*rt));
pool_put(&rtentry_pool, rt);
}
}
@@ -427,20 +509,33 @@ rtredirect(const struct sockaddr *dst, c
error = ENETUNREACH;
goto out;
}
- rt = rtalloc1(dst, 0);
/*
- * If the redirect isn't from our current router for this dst,
- * it's either old or wrong. If it redirects us to ourselves,
- * we have a routing loop, perhaps as a result of an interface
- * going down recently.
+ * If it redirects us to ourselves we have a routing loop,
+ * perhaps as a result of an interface going down recently.
*/
- if (!(flags & RTF_DONE) && rt &&
- (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
- error = EINVAL;
- else if (ifa_ifwithaddr(gateway))
+ if (ifa_ifwithaddr(gateway)) {
error = EHOSTUNREACH;
- if (error)
- goto done;
+ goto out;
+ }
+ rt = rtalloc1(dst, 0);
+ if (rt && !(flags & RTF_DONE)) {
+ /*
+ * If the redirect isn't from our current router for this dst,
+ * it's either old or wrong. Also calibrate rt.
+ */
+ struct rtentry *sentinel, *nrt;
+ CLIST_FOREACH(nrt, rt, sentinel, rt_list)
+ if(equal(src, nrt->rt_gateway) && (nrt->rt_ifa == ifa))
+ break;
+ if(nrt == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ rt->rt_refcnt--;
+ nrt->rt_refcnt++;
+ rt = nrt;
+ }
+
/*
* Create a new entry if we just got back a wildcard entry
* or the lookup failed. This is necessary for hosts
@@ -485,6 +580,7 @@ rtredirect(const struct sockaddr *dst, c
}
} else
error = EHOSTUNREACH;
+
done:
if (rt) {
if (rtp != NULL && !error)
@@ -674,7 +770,7 @@ rtrequest1(int req, struct rt_addrinfo *
{
int s = splsoftnet();
int error = 0;
- struct rtentry *rt, *crt;
+ struct rtentry *rt, *crt = NULL, *sentinel, *nrt;
struct radix_node *rn;
struct radix_node_head *rnh;
struct ifaddr *ifa;
@@ -698,16 +794,45 @@ rtrequest1(int req, struct rt_addrinfo *
}
if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL)
senderr(ESRCH);
- rt = (struct rtentry *)rn;
+ crt = rt = (struct rtentry *)rn;
+ /* Calibrate */
+ if (gateway != NULL && !(crt->rt_flags & RTF_CLONING)) {
+ /*
+ * XXX: we can have a gateway on cloning route
+ */
+ CLIST_FOREACH(rt, crt, sentinel, rt_list)
+ if (sockaddr_cmp(gateway, rt->rt_gateway) == 0)
+ break;
+ if (rt == NULL)
+ senderr(ESRCH);
+ } else
+ if (! CLIST_SINGULAR(crt, rt_list)) {
+ /*
+ * If gateway is not provided when
+ * multiple paths exist check if it's a cloning
+ * route and try to match ifp
+ */
+ if ( (crt->rt_flags & RTF_CLONING) == 0 ||
+ !(info->rti_ifa))
+ senderr(EINVAL);
+ CLIST_FOREACH(rt, crt, sentinel, rt_list)
+ if (rt->rt_ifp == info->rti_ifa->ifa_ifp)
+ break;
+ if (rt == NULL)
+ senderr(EINVAL);
+ }
+ if (CLIST_SINGULAR(rt, rt_list)) {
+ if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
+ senderr(ESRCH);
+ if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic("rtrequest delete");
+ }
if ((rt->rt_flags & RTF_CLONING) != 0) {
/* clean up any cloned children */
rtflushclone(dst->sa_family, rt);
}
- if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
- senderr(ESRCH);
- if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
- panic ("rtrequest delete");
- rt = (struct rtentry *)rn;
+ if (rt->rt_nodes->rn_flags & RNF_ROOT)
+ panic("rtrequest delete 2");
if (rt->rt_gwroute) {
RTFREE(rt->rt_gwroute);
rt->rt_gwroute = NULL;
@@ -781,26 +906,60 @@ rtrequest1(int req, struct rt_addrinfo *
rt->rt_parent = *ret_nrt;
rt->rt_parent->rt_refcnt++;
}
+ rt->rt_total = 1;
+ rt->rt_first = rt;
+ rt->rt_last = rt;
+ CLIST_INIT(rt, rt_list);
RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__,
__LINE__, (void *)rt->_rt_key);
rn = rnh->rnh_addaddr(rt_getkey(rt), netmask, rnh,
rt->rt_nodes);
RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__,
__LINE__, (void *)rt->_rt_key);
- if (rn == NULL && (crt = rtalloc1(rt_getkey(rt), 0)) != NULL) {
+ if (rn == NULL &&
+ ((crt = rtgethead(rt_getkey(rt), NULL)) != NULL) &&
/* overwrite cloned route */
- if ((crt->rt_flags & RTF_CLONED) != 0) {
- rtdeletemsg(crt);
- rn = rnh->rnh_addaddr(rt_getkey(rt),
- netmask, rnh, rt->rt_nodes);
+ ((crt->rt_flags & RTF_CLONED) != 0)) {
+ rtdeletemsg(crt);
+ rn = rnh->rnh_addaddr(rt_getkey(rt),
+ netmask, rnh, rt->rt_nodes);
+ crt = NULL;
+ if (rn == NULL) {
+ error = ENOMEM;
+ goto eexist;
}
- RTFREE(crt);
RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__,
- __LINE__, (void *)rt->_rt_key);
+ __LINE__, (void *)rt->_rt_key);
}
- RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__,
- __LINE__, (void *)rt->_rt_key);
- if (rn == NULL) {
+ else if (req == RTM_ADD && rn == NULL &&
+ ((crt = rtgethead(rt_getkey(rt), netmask)) != NULL)) {
+ /* New route for the same destination */
+ if (crt->rt_total >= MAX_PATHS) {
+ error = E2BIG;
+ goto eexist;
+ }
+ if (gateway) {
+ CLIST_FOREACH(nrt, crt, sentinel, rt_list)
+ if (sockaddr_cmp(nrt->rt_gateway, gateway) == 0)
+ goto eexist;
+ } else if((rt->rt_flags & RTF_CLONING) &&
+ (info->rti_ifa)) {
+ CLIST_FOREACH(nrt, crt, sentinel, rt_list)
+ if (nrt->rt_ifp == info->rti_ifa->ifa_ifp)
+ goto eexist;
+ } else if(rt->rt_flags & RTF_CLONING)
+ CLIST_FOREACH(nrt, crt, sentinel, rt_list)
+ if (nrt->rt_ifp == rt->rt_ifp)
+ goto eexist;
+ sockaddr_free(rt->_rt_key);
+ rt->rt_nodes->rn_mask = crt->rt_nodes->rn_mask;
+ rt->_rt_key = crt->_rt_key;
+ rt->rt_first = crt;
+ CLIST_INSERT_AFTER(crt, rt, rt_list);
+ crt->rt_total++;
+ crt = NULL;
+ } else if (rn == NULL) {
+eexist:
IFAFREE(ifa);
if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
rtfree(rt->rt_parent);
@@ -808,7 +967,10 @@ rtrequest1(int req, struct rt_addrinfo *
rtfree(rt->rt_gwroute);
rt_destroy(rt);
pool_put(&rtentry_pool, rt);
- senderr(EEXIST);
+ if (error)
+ senderr(error)
+ else
+ senderr(EEXIST);
}
RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__,
__LINE__, (void *)rt->_rt_key);
@@ -824,7 +986,8 @@ rtrequest1(int req, struct rt_addrinfo *
/* clean up any cloned children */
rtflushclone(dst->sa_family, rt);
}
- rtflushall(dst->sa_family);
+ if (crt == NULL)
+ rtflushall(dst->sa_family);
break;
case RTM_GET:
if (netmask != NULL) {
@@ -837,6 +1000,7 @@ rtrequest1(int req, struct rt_addrinfo *
senderr(ESRCH);
if (ret_nrt != NULL) {
rt = (struct rtentry *)rn;
+ rt = rtchoosepath_rr(rt);
*ret_nrt = rt;
rt->rt_refcnt++;
}
@@ -944,8 +1108,12 @@ rtinit(struct ifaddr *ifa, int cmd, int
rt_maskedcopy(odst, dst, ifa->ifa_netmask);
}
if ((rt = rtalloc1(dst, 0)) != NULL) {
+ struct rtentry *sentinel;
rt->rt_refcnt--;
- if (rt->rt_ifa != ifa)
+ CLIST_FOREACH(rt, rt, sentinel, rt_list)
+ if (rt->rt_ifa->ifa_ifp == ifa->ifa_ifp)
+ break;
+ if (rt == NULL)
return (flags & RTF_HOST) ? EHOSTUNREACH
: ENETUNREACH;
}
Index: sys/net/route.h
===================================================================
RCS file: /cvsroot/src/sys/net/route.h,v
retrieving revision 1.58
diff -u -p -r1.58 route.h
--- sys/net/route.h 27 Aug 2007 00:34:01 -0000 1.58
+++ sys/net/route.h 12 Nov 2007 17:14:39 -0000
@@ -93,6 +93,10 @@ struct rt_metrics {
#ifndef RNF_NORMAL
#include <net/radix.h>
#endif
+
+/* XXX: sysctl maybe ? */
+#define MAX_PATHS 64
+
struct rtentry {
struct radix_node rt_nodes[2]; /* tree glue, and other values */
#define rt_mask(r) ((const struct sockaddr *)((r)->rt_nodes->rn_mask))
@@ -108,7 +112,13 @@ struct rtentry {
struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */
LIST_HEAD(, rttimer) rt_timer; /* queue of timeouts for misc funcs */
struct rtentry *rt_parent; /* parent of cloned route */
- struct sockaddr *_rt_key;
+ struct sockaddr *_rt_key;
+ /* load-sharing */
+ CLIST_ENTRY(rtentry) rt_list;
+ struct rtentry *rt_first; /* First entry in list */
+#define RTFIRST(r) ((r)->rt_first)
+ struct rtentry *rt_last; /* For round robin */
+ uint8_t rt_total; /* Number of paths */
};
static inline const struct sockaddr *
@@ -366,6 +376,7 @@ out:
}
struct rtentry *rtfindparent(struct radix_node_head *, struct route *);
+struct rtentry *rtnext(struct rtentry *);
#ifdef RTCACHE_DEBUG
#define rtcache_init(ro) rtcache_init_debug(__func__, ro)
@@ -386,6 +397,7 @@ void rtcache_clear(struct route *);
void rtcache_update(struct route *, int);
void rtcache_free(struct route *);
int rtcache_setdst(struct route *, const struct sockaddr *);
+struct rtentry* rtchoosepath_rr(struct rtentry *);
static inline struct rtentry *
rtcache_lookup1(struct route *ro, const struct sockaddr *dst, int clone)
Index: sys/net/rtsock.c
===================================================================
RCS file: /cvsroot/src/sys/net/rtsock.c,v
retrieving revision 1.95
diff -u -p -r1.95 rtsock.c
--- sys/net/rtsock.c 19 Jul 2007 20:48:53 -0000 1.95
+++ sys/net/rtsock.c 12 Nov 2007 17:14:39 -0000
@@ -306,7 +306,7 @@ route_output(struct mbuf *m, ...)
if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
struct radix_node *rn;
- if (memcmp(dst, rt_getkey(rt), dst->sa_len) != 0)
+ if (sockaddr_cmp(dst, rt_getkey(rt)) != 0)
senderr(ESRCH);
netmask = intern_netmask(netmask);
for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
@@ -923,6 +923,8 @@ sysctl_dumpentry(struct rtentry *rt, voi
int error = 0, size;
struct rt_addrinfo info;
+ if (CLIST_NEXT(rt, rt_list) != RTFIRST(rt))
+ sysctl_dumpentry(CLIST_NEXT(rt, rt_list), v);
if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
return 0;
memset(&info, 0, sizeof(info));
Index: sys/netinet/if_arp.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/if_arp.c,v
retrieving revision 1.128
diff -u -p -r1.128 if_arp.c
--- sys/netinet/if_arp.c 2 Sep 2007 19:42:22 -0000 1.128
+++ sys/netinet/if_arp.c 12 Nov 2007 17:14:39 -0000
@@ -768,16 +768,18 @@ arpresolve(struct ifnet *ifp, struct rte
if (rt->rt_expire) {
rt->rt_flags &= ~RTF_REJECT;
if (la->la_asked == 0 || rt->rt_expire != time_second) {
+ struct rtentry *nrt, *sentinel;
rt->rt_expire = time_second;
if (la->la_asked++ < arp_maxtries)
- arprequest(ifp,
- &satocsin(rt->rt_ifa->ifa_addr)->sin_addr,
+ CLIST_FOREACH(nrt, rt->rt_parent, sentinel, rt_list)
+ arprequest(nrt->rt_ifp,
+ &satocsin(nrt->rt_ifa->ifa_addr)->sin_addr,
&satocsin(dst)->sin_addr,
#if NCARP > 0
(rt->rt_ifp->if_type == IFT_CARP) ?
CLLADDR(rt->rt_ifp->if_sadl):
#endif
- CLLADDR(ifp->if_sadl));
+ CLLADDR(nrt->rt_ifp->if_sadl));
else {
rt->rt_flags |= RTF_REJECT;
rt->rt_expire += arpt_down;
@@ -1097,6 +1099,19 @@ in_arpinput(struct mbuf *m)
if (rt->rt_expire)
rt->rt_expire = time_second + arpt_keep;
rt->rt_flags &= ~RTF_REJECT;
+ if(rt->rt_ifp != ifp) {
+ /*
+ * Reply came on different interface. Check
+ * if we have a rt_parent with this ifp
+ */
+ struct rtentry *nrt, *sentinel;
+ CLIST_FOREACH(nrt, rt->rt_parent, sentinel, rt_list)
+ if(nrt->rt_ifp == ifp) {
+ rt_replace_ifa(rt, nrt->rt_ifa);
+ rt->rt_ifp = nrt->rt_ifp;
+ break;
+ }
+ }
la->la_asked = 0;
s = splnet();
Index: sys/netinet/in.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.c,v
retrieving revision 1.118
diff -u -p -r1.118 in.c
--- sys/netinet/in.c 1 Sep 2007 04:32:51 -0000 1.118
+++ sys/netinet/in.c 12 Nov 2007 17:14:39 -0000
@@ -987,7 +987,7 @@ bad:
/*
* add a route to prefix ("connected route" in cisco terminology).
- * does nothing if there's some interface address with the same prefix already.
+ * does nothing if there's same prefix already assigned to the same interface.
*/
static int
in_addprefix(struct in_ifaddr *target, int flags)
@@ -1012,14 +1012,11 @@ in_addprefix(struct in_ifaddr *target, i
p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
}
- if (prefix.s_addr != p.s_addr)
+ if (prefix.s_addr != p.s_addr || target->ia_ifp != ia->ia_ifp)
continue;
-
/*
- * if we got a matching prefix route inserted by other
- * interface address, we don't need to bother
- *
- * XXX RADIX_MPATH implications here? -dyoung
+ * if we got a matching prefix route inserted on the same
+ * interface, we don't need to bother
*/
if (ia->ia_flags & IFA_ROUTE)
return 0;
Index: sys/netinet/in.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.h,v
retrieving revision 1.81
diff -u -p -r1.81 in.h
--- sys/netinet/in.h 19 Sep 2007 04:33:43 -0000 1.81
+++ sys/netinet/in.h 12 Nov 2007 17:14:39 -0000
@@ -450,8 +450,9 @@ struct ip_mreq {
#define IPCTL_IFQ 21 /* ipintrq node */
#define IPCTL_RANDOMID 22 /* use random IP ids (if configured) */
#define IPCTL_LOOPBACKCKSUM 23 /* do IP checksum on loopback */
-#define IPCTL_STATS 24 /* IP statistics */
-#define IPCTL_MAXID 25
+#define IPCTL_STATS 24 /* IP statistics */
+#define IPCTL_LOAD_SHARING 25 /* Load sharing */
+#define IPCTL_MAXID 26
#define IPCTL_NAMES { \
{ 0, 0 }, \
@@ -479,7 +480,13 @@ struct ip_mreq {
{ "random_id", CTLTYPE_INT }, \
{ "do_loopback_cksum", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
+ { "load-sharing", CTLTYPE_NODE }, \
}
+
+/* Load sharing */
+#define IPCTL_LS_SELECTED 1
+#define IPCTL_LS_AVAILABLE 2
+
#endif /* _NETBSD_SOURCE */
/* INET6 stuff */
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip_input.c
--- sys/netinet/ip_input.c 2 Oct 2007 20:35:04 -0000 1.254
+++ sys/netinet/ip_input.c 12 Nov 2007 17:14:39 -0000
@@ -218,6 +218,13 @@ int ip_do_randomid = 0;
*/
int ip_checkinterface = 0;
+#define INITIAL_LS 2
+#define MAX_LS_STRING 20
+
+/* See also defines in ip_output.c if you want to change these */
+const char* load_sharing_strings[] = { "first-only", "round-robin",
+ "simple-sum", NULL };
+int load_sharing_index = INITIAL_LS;
struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
@@ -2163,6 +2170,45 @@ sysctl_net_inet_ip_hashsize(SYSCTLFN_ARG
}
#endif /* GATEWAY */
+static int
+sysctl_load_sharing(SYSCTLFN_ARGS)
+{
+ int error, i;
+ struct sysctlnode node = *rnode;
+ char lsc[MAX_LS_STRING];
+
+ strlcpy(lsc, load_sharing_strings[load_sharing_index], MAX_LS_STRING);
+ node.sysctl_data = lsc;
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error || newp == NULL)
+ return error;
+ for (i=0; load_sharing_strings[i] != NULL; i++)
+ if (strncmp(load_sharing_strings[i], lsc, MAX_LS_STRING) == 0)
+ break;
+
+ if (load_sharing_strings[i] == NULL)
+ return EINVAL;
+ load_sharing_index = i;
+ return 0;
+}
+
+static int
+sysctl_ls_types(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node = *rnode;
+ int i;
+ char rt[255];
+
+ rt[0]=0;
+ /* XXX: slow and ugly */
+ for (i=0; load_sharing_strings[i] != NULL; i++) {
+ strlcat(rt, load_sharing_strings[i], 255);
+ if (load_sharing_strings[i+1] != NULL)
+ strlcat(rt, " ", 255);
+ }
+ node.sysctl_data = rt;
+ return sysctl_lookup(SYSCTLFN_CALL(&node));
+}
SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup")
{
@@ -2370,4 +2416,24 @@ SYSCTL_SETUP(sysctl_net_inet_ip_setup, "
NULL, 0, &ipstat, sizeof(ipstat),
CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS,
CTL_EOL);
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "load-sharing",
+ SYSCTL_DESCR("IP load sharing"),
+ NULL, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_IP,
+ IPCTL_LOAD_SHARING, CTL_EOL);
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_STRING, "selected",
+ SYSCTL_DESCR("IP load sharing algorithm"),
+ sysctl_load_sharing, 0,
+ &load_sharing_strings[INITIAL_LS],
+ MAX_LS_STRING - 1,
+ CTL_NET, PF_INET, IPPROTO_IP,
+ IPCTL_LOAD_SHARING, IPCTL_LS_SELECTED, CTL_EOL);
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT, CTLTYPE_STRING, "available",
+ SYSCTL_DESCR("IP load sharing supported algorithms"),
+ sysctl_ls_types, 0, NULL, 255, CTL_NET,
+ PF_INET, IPPROTO_IP, IPCTL_LOAD_SHARING, IPCTL_LS_AVAILABLE,
+ CTL_EOL);
}
Index: sys/netinet/ip_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_output.c,v
retrieving revision 1.184
diff -u -p -r1.184 ip_output.c
--- sys/netinet/ip_output.c 19 Sep 2007 04:33:43 -0000 1.184
+++ sys/netinet/ip_output.c 12 Nov 2007 17:14:39 -0000
@@ -171,6 +171,16 @@ int ip_do_loopback_cksum = 0;
(((csum_flags) & M_CSUM_TCPv4) != 0 && tcp_do_loopback_cksum) || \
(((csum_flags) & M_CSUM_IPv4) != 0 && ip_do_loopback_cksum)))
+/* See also string associations in ip_input.c if you want to change these */
+#define LS_NONE 0
+#define LS_RR 1
+#define LS_SS 2
+
+extern int load_sharing_index;
+
+#define tiny_sum(ip4a) ((ip4a >> 24) + (ip4a << 8 >> 24) + \
+ (ip4a << 16 >> 24) + (ip4a << 24 >> 24))
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@@ -338,13 +348,43 @@ ip_output(struct mbuf *m0, ...)
mtu = ifp->if_mtu;
IFP_TO_IA(ifp, ia);
} else {
- if (ro->ro_rt == NULL)
+ int ro_cached = 1;
+ if (ro->ro_rt == NULL) {
rtcache_init(ro);
+ ro_cached = 0;
+ }
if (ro->ro_rt == NULL) {
ipstat.ips_noroute++;
error = EHOSTUNREACH;
goto bad;
}
+ /* Load-sharing */
+ if (ro->ro_rt->rt_total > 1 &&
+ load_sharing_index != LS_NONE &&
+ !(load_sharing_index == LS_SS && ro_cached)) {
+ ro->ro_rt->rt_refcnt--;
+ switch(load_sharing_index) {
+ case LS_RR:
+ ro->ro_rt = rtchoosepath_rr(ro->ro_rt);
+ break;
+ case LS_SS:
+ {
+ uint8_t i, hsh;
+ /* I'm not that happy with this "sum" */
+ hsh = ( tiny_sum(ip->ip_src.s_addr) +
+ tiny_sum(ip->ip_dst.s_addr) +
+ ip->ip_p + ip->ip_tos) %
+ ro->ro_rt->rt_total;
+ /* XXX: Normally it should be up... */
+ if (hsh == 0 && !(ro->ro_rt->rt_flags & RTF_UP))
+ ro->ro_rt = rtnext(ro->ro_rt);
+ else for (i = 0; i < hsh; i++)
+ ro->ro_rt = rtnext(ro->ro_rt);
+ }
+ break;
+ }
+ ro->ro_rt->rt_refcnt++;
+ }
ia = ifatoia(ro->ro_rt->rt_ifa);
ifp = ro->ro_rt->rt_ifp;
if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
Index: sys/sys/queue.h
===================================================================
RCS file: /cvsroot/src/sys/sys/queue.h,v
retrieving revision 1.47
diff -u -p -r1.47 queue.h
--- sys/sys/queue.h 18 Jul 2007 12:07:35 -0000 1.47
+++ sys/sys/queue.h 12 Nov 2007 17:14:39 -0000
@@ -674,4 +674,57 @@ struct { \
? ((head)->cqh_last) \
: (elm->field.cqe_prev))
+/*
+ * Circular lists definitions
+ */
+#define CLIST_ENTRY(__type) \
+ struct { \
+ struct __type *cl_next; \
+ struct __type *cl_prev; \
+ }
+
+/*
+ * Circular lists functions
+ */
+#define CLIST_FOREACH1(__elm, __first, __sentinel, __field) \
+ for ((__elm) = (__sentinel) = (__first); (__elm) != NULL;\
+ (__elm) = ((__elm)->__field == (__sentinel)) \
+ ? NULL \
+ : (__elm)->__field)
+
+#define CLIST_FOREACH(__elm, __first, __sentinel, __field) \
+ CLIST_FOREACH1((__elm), (__first), __sentinel, __field.cl_next)
+
+#define CLIST_FOREACH_REVERSE(__elm, __first, __sentinel, __field) \
+ CLIST_FOREACH1((__elm), (__first), __sentinel, __field.cl_prev)
+
+#define CLIST_INIT(__elm, __field) \
+ do { \
+ (__elm)->__field.cl_prev = (__elm)->__field.cl_next = \
+ (__elm); \
+ } while (/*CONSTCOND*/0)
+
+#define CLIST_SINGULAR(__elm, __field) ((__elm)->__field.cl_prev == (__elm))
+
+#define CLIST_REMOVE(__elm, __field) \
+ do { \
+ (__elm)->__field.cl_prev->__field.cl_next = \
+ (__elm)->__field.cl_next; \
+ (__elm)->__field.cl_next->__field.cl_prev = \
+ (__elm)->__field.cl_prev; \
+ CLIST_INIT((__elm), __field); \
+ } while (/*CONSTCOND*/0)
+
+#define CLIST_INSERT_AFTER(__listelm, __elm, __field) \
+ do { \
+ assert(__listelm != __elm); \
+ (__elm)->__field.cl_prev = (__listelm); \
+ (__elm)->__field.cl_next = (__listelm)->__field.cl_next;\
+ (__listelm)->__field.cl_next = (__elm); \
+ (__elm)->__field.cl_next->__field.cl_prev = (__elm); \
+ } while (/*CONSTCOND*/0)
+
+#define CLIST_NEXT(__elm, __field) ((__elm)->__field.cl_next)
+#define CLIST_PREV(__elm, __field) ((__elm)->__field.cl_prev)
+
#endif /* !_SYS_QUEUE_H_ */
Index: usr.bin/netstat/route.c
===================================================================
RCS file: /cvsroot/src/usr.bin/netstat/route.c,v
retrieving revision 1.69
diff -u -p -r1.69 route.c
--- usr.bin/netstat/route.c 19 Jul 2007 20:51:04 -0000 1.69
+++ usr.bin/netstat/route.c 12 Nov 2007 17:14:39 -0000
@@ -171,6 +171,11 @@ again:
} else if (do_rtent) {
kget(rn, rtentry);
p_krtentry(&rtentry);
+ while ( CLIST_NEXT(&rtentry, rt_list) !=
+ (struct rtentry*)rn ) {
+ kget(CLIST_NEXT(&rtentry, rt_list), rtentry);
+ p_krtentry(&rtentry);
+ }
if (Aflag)
p_rtnode();
} else {