Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/**
* cxgb_register_client - register an offload client
* @client: the client
*
* Add the client to the client list,
* and call backs the client for each activated offload device
*/
void
cxgb_register_client(struct cxgb_client *client)
{
struct toedev *tdev;
if (client->add) {
TAILQ_FOREACH(tdev, &ofld_dev_list, ofld_entry) {
if (offload_activated(tdev))
client->add(tdev);
}
}
mtx_unlock(&cxgb_db_lock);
}
/**
* cxgb_unregister_client - unregister an offload client
* @client: the client
*
* Remove the client to the client list,
* and call backs the client for each activated offload device.
*/
void
cxgb_unregister_client(struct cxgb_client *client)
{
struct toedev *tdev;
if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
if (vlan && vlan != EVL_VLID_MASK) {
grp = p->vlan_grp;
dev = grp ? grp->vlan_devices[vlan] : NULL;
} else
while (dev->master)
dev = dev->master;
return dev;
}
}
#endif
return NULL;
}
static inline void
failover_fixup(adapter_t *adapter, int port)
{
if (adapter->params.rev == 0) {
struct ifnet *ifp = adapter->port[port].ifp;
struct cmac *mac = &adapter->port[port].mac;
if (!(ifp->if_flags & IFF_UP)) {
/* Failover triggered by the interface ifdown */
t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
F_TXEN);
t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
} else {
/* Failover triggered by the interface link down */
t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
F_RXEN);
}
}
}
static int
cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
{
int ret = 0;
struct ulp_iscsi_info *uiip = data;
switch (req) {
case ULP_ISCSI_GET_PARAMS:
uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
/*
* On tx, the iscsi pdu has to be <= tx page size and has to
* fit into the Tx PM FIFO.
*/
uiip->max_txsz = uimin(adapter->params.tp.tx_pg_size,
t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
/* on rx, the iscsi pdu has to be < rx page size and the
whole pdu + cpl headers has to fit into one sge buffer */
uiip->max_rxsz =
(unsigned int)uimin(adapter->params.tp.rx_pg_size,
(adapter->sge.qs[0].fl[1].buf_size -
sizeof(struct cpl_rx_data) * 2 -
sizeof(struct cpl_rx_data_ddp)) );
break;
case ULP_ISCSI_SET_PARAMS:
t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
break;
default:
ret = (EOPNOTSUPP);
}
return ret;
}
/* Response queue used for RDMA events. */
#define ASYNC_NOTIF_RSPQ 0
static int
cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
{
int ret = 0;
/* may be called in any context */
mtx_lock(&adapter->sge.reg_lock);
ret = t3_sge_cqcntxt_op(adapter, req2->id, req2->op,
req2->credits);
mtx_unlock(&adapter->sge.reg_lock);
break;
}
case RDMA_GET_MEM: {
struct ch_mem_range *t = data;
struct mc7 *mem;
if ((t->addr & 7) || (t->len & 7))
return (EINVAL);
if (t->mem_id == MEM_CM)
mem = &adapter->cm;
else if (t->mem_id == MEM_PMRX)
mem = &adapter->pmrx;
else if (t->mem_id == MEM_PMTX)
mem = &adapter->pmtx;
else
return (EINVAL);
ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
if (ret)
return (ret);
break;
}
case RDMA_CQ_SETUP: {
struct rdma_cq_setup *req2 = data;
mtx_lock(&adapter->sge.reg_lock);
ret = t3_sge_init_cqcntxt(adapter, req2->id, req2->base_addr,
req2->size, ASYNC_NOTIF_RSPQ,
req2->ovfl_mode, req2->credits,
req2->credit_thres);
mtx_unlock(&adapter->sge.reg_lock);
break;
}
case RDMA_CQ_DISABLE:
mtx_lock(&adapter->sge.reg_lock);
ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
mtx_unlock(&adapter->sge.reg_lock);
break;
case RDMA_CTRL_QP_SETUP: {
struct rdma_ctrlqp_setup *req2 = data;
switch (req) {
case GET_MAX_OUTSTANDING_WR:
*(unsigned int *)data = FW_WR_NUM;
break;
case GET_WR_LEN:
*(unsigned int *)data = WR_FLITS;
break;
case GET_TX_MAX_CHUNK:
*(unsigned int *)data = 1 << 20; /* 1MB */
break;
case GET_TID_RANGE:
tid = data;
tid->num = t3_mc5_size(&adapter->mc5) -
adapter->params.mc5.nroutes -
adapter->params.mc5.nfilters -
adapter->params.mc5.nservers;
tid->base = 0;
break;
case GET_STID_RANGE:
tid = data;
tid->num = adapter->params.mc5.nservers;
tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
adapter->params.mc5.nfilters -
adapter->params.mc5.nroutes;
break;
case GET_L2T_CAPACITY:
*(unsigned int *)data = 2048;
break;
case GET_MTUS:
mtup = data;
mtup->size = NMTUS;
mtup->mtus = adapter->params.mtus;
break;
case GET_IFF_FROM_MAC:
iffmacp = data;
iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
iffmacp->vlan_tag & EVL_VLID_MASK);
break;
case GET_DDP_PARAMS:
ddpp = data;
ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
break;
case GET_PORTS:
ports = data;
ports->nports = adapter->params.nports;
for_each_port(adapter, port)
ports->lldevs[port] = adapter->port[port].ifp;
break;
case FAILOVER:
port = *(int *)data;
t3_port_failover(adapter, port);
failover_fixup(adapter, port);
break;
case FAILOVER_DONE:
port = *(int *)data;
t3_failover_done(adapter, port);
break;
case FAILOVER_CLEAR:
t3_failover_clear(adapter);
break;
case ULP_ISCSI_GET_PARAMS:
case ULP_ISCSI_SET_PARAMS:
if (!offload_running(adapter))
return (EAGAIN);
return cxgb_ulp_iscsi_ctl(adapter, req, data);
case RDMA_GET_PARAMS:
case RDMA_CQ_OP:
case RDMA_CQ_SETUP:
case RDMA_CQ_DISABLE:
case RDMA_CTRL_QP_SETUP:
case RDMA_GET_MEM:
if (!offload_running(adapter))
return (EAGAIN);
return cxgb_rdma_ctl(adapter, req, data);
default:
return (EOPNOTSUPP);
}
return 0;
}
/*
* Dummy handler for Rx offload packets in case we get an offload packet before
* proper processing is setup. This complains and drops the packet as it isn't
* normal to get offload packets at this stage.
*/
static int
rx_offload_blackhole(struct toedev *dev, struct mbuf **m, int n)
{
CH_ERR(tdev2adap(dev), "%d unexpected offload packets, first data 0x%x\n",
n, *mtod(m[0], uint32_t *));
while (n--)
m_freem(m[n]);
return 0;
}
/*
* Free a server TID and return it to the free pool.
*/
void
cxgb_free_stid(struct toedev *tdev, int stid)
{
struct tid_info *t = &(TOE_DATA(tdev))->tid_maps;
union listen_entry *p = stid2entry(t, stid);
/* use ctx as a next pointer in the tid release list */
void
cxgb_queue_tid_release(struct toedev *tdev, unsigned int tid)
{
struct toe_data *td = TOE_DATA(tdev);
struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
if (!p->ctx)
workqueue_enqueue(td->tid_release_task.wq, &td->tid_release_task.w, NULL);
mtx_unlock(&td->tid_release_lock);
}
/*
* Remove a tid from the TID table. A client may defer processing its last
* CPL message if it is locked at the time it arrives, and while the message
* sits in the client's backlog the TID may be reused for another connection.
* To handle this we atomically switch the TID association if it still points
* to the original client context.
*/
void
cxgb_remove_tid(struct toedev *tdev, void *ctx, unsigned int tid)
{
struct tid_info *t = &(TOE_DATA(tdev))->tid_maps;
/* It is very thin place. report_unreachable is very
complicated routine. Particularly, it can hit the same
neighbour entry!
So that, we try to be accurate and avoid dead loop. --ANK
*/
while (neigh->nud_state == NUD_FAILED &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
write_unlock(&neigh->lock);
neigh->ops->error_report(neigh, skb);
write_lock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
if (neigh->nud_state & NUD_IN_TIMER) {
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
struct mbuf *m = skb_peek(&neigh->arp_queue);
#ifdef CONFIG_ARPD
if (notify && neigh->parms->app_probes)
neigh_app_notify(neigh);
#endif
neigh_release(neigh);
}
static int
arp_constructor_offload(struct neighbour *neigh)
{
if (neigh->ifp && is_offloading(neigh->ifp))
neigh->timer.function = neigh_timer_handler_offload;
return orig_arp_constructor(neigh);
}
/*
* This must match exactly the signature of neigh_update for jprobes to work.
* It runs from a trap handler with interrupts off so don't disable BH.
*/
static int
neigh_update_offload(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags)
{
write_lock(&neigh->lock);
cxgb_neigh_update(neigh);
write_unlock(&neigh->lock);
jprobe_return();
/* NOTREACHED */
return 0;
}
#else /* Module support */
static inline int
prepare_arp_with_t3core(void)
{
return 0;
}
static inline void
restore_arp_sans_t3core(void)
{}
#endif
#endif
/*
* Process a received packet with an unknown/unexpected CPL opcode.
*/
static int
do_bad_cpl(struct toedev *dev, struct mbuf *m)
{
log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name,
*mtod(m, uint32_t *));
return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
}
/*
* Handlers for each CPL opcode
*/
static cpl_handler_func cpl_handlers[NUM_CPL_CMDS];
/*
* Add a new handler to the CPL dispatch table. A NULL handler may be supplied
* to unregister an existing handler.
*/
void
t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h)
{
if (opcode < NUM_CPL_CMDS)
cpl_handlers[opcode] = h ? h : do_bad_cpl;
else
log(LOG_ERR, "T3C: handler registration for "
"opcode %x failed\n", opcode);
}
/*
* TOEDEV's receive method.
*/
int
process_rx(struct toedev *dev, struct mbuf **m, int n)
{
while (n--) {
struct mbuf *m0 = *m++;
unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data));
int ret = cpl_handlers[opcode] (dev, m0);
#if VALIDATE_TID
if (ret & CPL_RET_UNKNOWN_TID) {
union opcode_tid *p = cplhdr(m0);
log(LOG_ERR, "%s: CPL message (opcode %u) had "
"unknown TID %u\n", dev->name, opcode,
G_TID(ntohl(p->opcode_tid)));
}
#endif
if (ret & CPL_RET_BUF_DONE)
m_freem(m0);
}
return 0;
}
/*
* Sends an sk_buff to a T3C driver after dealing with any active network taps.
*/
int
cxgb_ofld_send(struct toedev *dev, struct mbuf *m)
{
int r;
critical_enter();
r = dev->send(dev, m);
critical_exit();
return r;
}
/**
* cxgb_ofld_recv - process n received offload packets
* @dev: the offload device
* @m: an array of offload packets
* @n: the number of offload packets
*
* Process an array of ingress offload packets. Each packet is forwarded
* to any active network taps and then passed to the offload device's receive
* method. We optimize passing packets to the receive method by passing
* it the whole array at once except when there are active taps.
*/
int
cxgb_ofld_recv(struct toedev *dev, struct mbuf **m, int n)
{
#if defined(CONFIG_CHELSIO_T3)
if (likely(!netdev_nit))
return dev->recv(dev, skb, n);
olddev = old->rt_ifp;
newdev = new->rt_ifp;
if (!is_offloading(olddev))
return;
if (!is_offloading(newdev)) {
log(LOG_WARNING, "%s: Redirect to non-offload"
"device ignored.\n", __func__);
return;
}
tdev = TOEDEV(olddev);
BUG_ON(!tdev);
if (tdev != TOEDEV(newdev)) {
log(LOG_WARNING, "%s: Redirect to different "
"offload device ignored.\n", __func__);
return;
}
/* Add new L2T entry */
e = t3_l2t_get(tdev, new, ((struct port_info *)new->rt_ifp->if_softc)->port_id);
if (!e) {
log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
__func__);
return;
}
/* Walk tid table and notify clients of dst change. */
ti = &(TOE_DATA(tdev))->tid_maps;
for (tid=0; tid < ti->ntids; tid++) {
te = lookup_tid(ti, tid);
BUG_ON(!te);
if (te->ctx && te->client && te->client->redirect) {
update_tcb = te->client->redirect(te->ctx, old, new,
e);
if (update_tcb) {
l2t_hold(L2DATA(tdev), e);
set_l2t_ix(tdev, tid, e);
}
}
}
l2t_release(L2DATA(tdev), e);
}
/*
* Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
* The allocated memory is cleared.
*/
void *
cxgb_alloc_mem(unsigned long size)
{
return malloc(size, M_DEVBUF, M_ZERO);
}
/*
* Allocate and initialize the TID tables. Returns 0 on success.
*/
static int
init_tid_tabs(struct tid_info *t, unsigned int ntids,
unsigned int natids, unsigned int nstids,
unsigned int atid_base, unsigned int stid_base)
{
unsigned long size = ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
t->tid_tab = cxgb_alloc_mem(size);
if (!t->tid_tab)
return (ENOMEM);