Copyright (c) 2001-2017, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/* Sysctl handlers */
static int ixv_sysctl_interrupt_rate_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_next_to_check_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_next_to_refresh_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_rdh_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_rdt_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_tdt_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_tdh_handler(SYSCTLFN_PROTO);
static int ixv_sysctl_tx_process_limit(SYSCTLFN_PROTO);
static int ixv_sysctl_rx_process_limit(SYSCTLFN_PROTO);
static int ixv_sysctl_rx_copy_len(SYSCTLFN_PROTO);
/* The MSI-X Interrupt handlers */
static int ixv_msix_que(void *);
static int ixv_msix_mbx(void *);
/* Number of Queues - do not exceed MSI-X vectors - 1 */
static int ixv_num_queues = 0;
#define TUNABLE_INT(__x, __y)
TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues);
/*
* AIM: Adaptive Interrupt Moderation
* which means that the interrupt rate
* is varied over time based on the
* traffic for that interrupt vector
*/
static bool ixv_enable_aim = false;
TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim);
static int ixv_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
TUNABLE_INT("hw.ixv.max_interrupt_rate", &ixv_max_interrupt_rate);
/* How many packets rxeof tries to clean at a time */
static int ixv_rx_process_limit = 256;
TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit);
/* How many packets txeof tries to clean at a time */
static int ixv_tx_process_limit = 256;
TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit);
/* Which packet processing uses workqueue or softint */
static bool ixv_txrx_workqueue = false;
/*
* Number of TX descriptors per ring,
* setting higher than RX as this seems
* the better performing choice.
*/
static int ixv_txd = DEFAULT_TXD;
TUNABLE_INT("hw.ixv.txd", &ixv_txd);
/* Number of RX descriptors per ring */
static int ixv_rxd = DEFAULT_RXD;
TUNABLE_INT("hw.ixv.rxd", &ixv_rxd);
#if 0
static int (*ixv_start_locked)(struct ifnet *, struct tx_ring *);
static int (*ixv_ring_empty)(struct ifnet *, struct buf_ring *);
#endif
/************************************************************************
* ixv_probe - Device identification routine
*
* Determines if the driver should be loaded on
* adapter based on its PCI vendor/device ID.
*
* return BUS_PROBE_DEFAULT on success, positive on failure
************************************************************************/
static int
ixv_probe(device_t dev, cfdata_t cf, void *aux)
{
#ifdef __HAVE_PCI_MSI_MSIX
const struct pci_attach_args *pa = aux;
/* Negotiate mailbox API version */
error = ixv_negotiate_api(sc);
if (error)
aprint_normal_dev(dev,
"MBX API negotiation failed during attach!\n");
switch (hw->api_version) {
case ixgbe_mbox_api_10:
apivstr = "1.0";
break;
case ixgbe_mbox_api_20:
apivstr = "2.0";
break;
case ixgbe_mbox_api_11:
apivstr = "1.1";
break;
case ixgbe_mbox_api_12:
apivstr = "1.2";
break;
case ixgbe_mbox_api_13:
apivstr = "1.3";
break;
case ixgbe_mbox_api_14:
apivstr = "1.4";
break;
case ixgbe_mbox_api_15:
apivstr = "1.5";
break;
default:
apivstr = "unknown";
break;
}
aprint_normal_dev(dev, "Mailbox API %s\n", apivstr);
/* If no mac address was assigned, make a random one */
if (!ixv_check_ether_addr(hw->mac.addr)) {
u8 addr[ETHER_ADDR_LEN];
uint64_t rndval = cprng_strong64();
/* Register for VLAN events */
ether_set_vlan_cb(&sc->osdep.ec, ixv_vlan_cb);
/* Do descriptor calc and sanity checks */
if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) {
aprint_error_dev(dev, "Invalid TX ring size (%d). "
"It must be between %d and %d, "
"inclusive, and must be a multiple of %zu. "
"Using default value of %d instead.\n",
ixv_txd, MIN_TXD, MAX_TXD,
DBA_ALIGN / sizeof(union ixgbe_adv_tx_desc),
DEFAULT_TXD);
sc->num_tx_desc = DEFAULT_TXD;
} else
sc->num_tx_desc = ixv_txd;
if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) {
aprint_error_dev(dev, "Invalid RX ring size (%d). "
"It must be between %d and %d, "
"inclusive, and must be a multiple of %zu. "
"Using default value of %d instead.\n",
ixv_rxd, MIN_RXD, MAX_RXD,
DBA_ALIGN / sizeof(union ixgbe_adv_rx_desc),
DEFAULT_RXD);
sc->num_rx_desc = DEFAULT_RXD;
} else
sc->num_rx_desc = ixv_rxd;
/* Sysctls for limiting the amount of work done in the taskqueues */
sc->rx_process_limit
= (ixv_rx_process_limit <= sc->num_rx_desc)
? ixv_rx_process_limit : sc->num_rx_desc;
sc->tx_process_limit
= (ixv_tx_process_limit <= sc->num_tx_desc)
? ixv_tx_process_limit : sc->num_tx_desc;
/* Set default high limit of copying mbuf in rxeof */
sc->rx_copy_len = IXGBE_RX_COPY_LEN_MAX;
/* Check if VF was disabled by PF */
error = hw->mac.ops.get_link_state(hw, &sc->link_enabled);
if (error) {
/* PF is not capable of controlling VF state. Enable the link. */
sc->link_enabled = TRUE;
}
/* Do the stats setup */
ixv_init_stats(sc);
ixv_add_stats_sysctls(sc);
if (sc->feat_en & IXGBE_FEATURE_NETMAP)
ixgbe_netmap_attach(sc);
snprintb(buf, sizeof(buf), IXGBE_FEATURE_FLAGS, sc->feat_cap);
aprint_verbose_dev(dev, "feature cap %s\n", buf);
snprintb(buf, sizeof(buf), IXGBE_FEATURE_FLAGS, sc->feat_en);
aprint_verbose_dev(dev, "feature ena %s\n", buf);
/************************************************************************
* ixv_detach - Device removal routine
*
* Called when the driver is being removed.
* Stops the adapter and deallocates all the resources
* that were allocated for driver operation.
*
* return 0 on success, positive on failure
************************************************************************/
static int
ixv_detach(device_t dev, int flags)
{
struct ixgbe_softc *sc = device_private(dev);
struct ixgbe_hw *hw = &sc->hw;
struct tx_ring *txr = sc->tx_rings;
struct rx_ring *rxr = sc->rx_rings;
struct ixgbevf_hw_stats *stats = &sc->stats.vf;
INIT_DEBUGOUT("ixv_detach: begin");
if (sc->osdep.attached == false)
return 0;
/* Stop the interface. Callouts are stopped in it. */
ixv_ifstop(sc->ifp, 1);
if (VLAN_ATTACHED(&sc->osdep.ec) &&
(flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == 0) {
aprint_error_dev(dev, "VLANs in use, detach first\n");
return EBUSY;
}
if (sc->feat_en & IXGBE_FEATURE_NETMAP)
netmap_detach(sc->ifp);
ixv_free_pci_resources(sc);
#if 0 /* XXX the NetBSD port is probably missing something here */
bus_generic_detach(dev);
#endif
if_detach(sc->ifp);
ifmedia_fini(&sc->media);
if_percpuq_destroy(sc->ipq);
/************************************************************************
* ixv_init_locked - Init entry point
*
* Used in two ways: It is used by the stack as an init entry
* point in network interface structure. It is also used
* by the driver as a hw/sw initialization routine to get
* to a consistent state.
*
* return 0 on success, positive on failure
************************************************************************/
static void
ixv_init_locked(struct ixgbe_softc *sc)
{
struct ifnet *ifp = sc->ifp;
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
struct ix_queue *que;
int error = 0;
uint32_t mask;
int i;
INIT_DEBUGOUT("ixv_init_locked: begin");
KASSERT(mutex_owned(&sc->core_mtx));
hw->adapter_stopped = FALSE;
hw->mac.ops.stop_adapter(hw);
callout_stop(&sc->timer);
for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
que->disabled_count = 0;
/* reprogram the RAR[0] in case user changed it. */
hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
/* Get the latest mac address, User can use a LAA */
memcpy(hw->mac.addr, CLLADDR(ifp->if_sadl),
IXGBE_ETH_LENGTH_OF_ADDRESS);
hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, 1);
/* Prepare transmit descriptors and buffers */
if (ixgbe_setup_transmit_structures(sc)) {
aprint_error_dev(dev, "Could not setup transmit structures\n");
ixv_stop_locked(sc);
return;
}
/* Reset VF and renegotiate mailbox API version */
hw->mac.ops.reset_hw(hw);
hw->mac.ops.start_hw(hw);
error = ixv_negotiate_api(sc);
if (error)
device_printf(dev,
"Mailbox API negotiation failed in init_locked!\n");
ixv_initialize_transmit_units(sc);
/* Setup Multicast table */
ixv_set_rxfilter(sc);
/* Use fixed buffer size, even for jumbo frames */
sc->rx_mbuf_sz = MCLBYTES;
/* Prepare receive descriptors and buffers */
error = ixgbe_setup_receive_structures(sc);
if (error) {
device_printf(dev,
"Could not setup receive structures (err = %d)\n", error);
ixv_stop_locked(sc);
return;
}
/* Set up VLAN offload and filter */
ixv_setup_vlan_support(sc);
/* Set up MSI-X routing */
ixv_configure_ivars(sc);
/* Set up auto-mask */
mask = (1 << sc->vector);
for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
mask |= (1 << que->msix);
IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, mask);
/* Set moderation on the Link interrupt */
ixv_eitr_write(sc, sc->vector, IXGBE_LINK_ITR);
/* Stats init */
ixv_init_stats(sc);
/* Config/Enable Link */
error = hw->mac.ops.get_link_state(hw, &sc->link_enabled);
if (error) {
/* PF is not capable of controlling VF state. Enable the link. */
sc->link_enabled = TRUE;
} else if (sc->link_enabled == FALSE)
device_printf(dev, "VF is disabled by PF\n");
if (sc->enable_aim == false)
goto no_calc;
/*
* Do Adaptive Interrupt Moderation:
* - Write out last calculated setting
* - Calculate based on average size over
* the last interval.
*/
if (que->eitr_setting)
ixv_eitr_write(sc, que->msix, que->eitr_setting);
que->eitr_setting = 0;
/* Idle, do nothing */
if ((txr->bytes == 0) && (rxr->bytes == 0))
goto no_calc;
if ((txr->bytes) && (txr->packets))
newitr = txr->bytes/txr->packets;
if ((rxr->bytes) && (rxr->packets))
newitr = uimax(newitr, (rxr->bytes / rxr->packets));
newitr += 24; /* account for hardware frame, crc */
/* set an upper boundary */
newitr = uimin(newitr, 3000);
/* Be nice to the mid range */
if ((newitr > 300) && (newitr < 1200))
newitr = (newitr / 3);
else
newitr = (newitr / 2);
/*
* When RSC is used, ITR interval must be larger than RSC_DELAY.
* Currently, we use 2us for RSC_DELAY. The minimum value is always
* greater than 2us on 100M (and 10M?(not documented)), but it's not
* on 1G and higher.
*/
if ((sc->link_speed != IXGBE_LINK_SPEED_100_FULL)
&& (sc->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
if (newitr < IXGBE_MIN_RSC_EITR_10G1G)
newitr = IXGBE_MIN_RSC_EITR_10G1G;
}
/* save for next interrupt */
que->eitr_setting = newitr;
/************************************************************************
* ixv_media_status - Media Ioctl callback
*
* Called whenever the user queries the status of
* the interface using ifconfig.
************************************************************************/
static void
ixv_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct ixgbe_softc *sc = ifp->if_softc;
/************************************************************************
* ixv_media_change - Media Ioctl callback
*
* Called when the user changes speed/duplex using
* media/mediopt option with ifconfig.
************************************************************************/
static int
ixv_media_change(struct ifnet *ifp)
{
struct ixgbe_softc *sc = ifp->if_softc;
struct ifmedia *ifm = &sc->media;
INIT_DEBUGOUT("ixv_media_change: begin");
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
break;
default:
device_printf(sc->dev, "Only auto media type\n");
return (EINVAL);
}
return (0);
} /* ixv_media_change */
static void
ixv_schedule_admin_tasklet(struct ixgbe_softc *sc)
{
if (sc->schedule_wqs_ok) {
if (atomic_cas_uint(&sc->admin_pending, 0, 1) == 0)
workqueue_enqueue(sc->admin_wq,
&sc->admin_wc, NULL);
}
}
/************************************************************************
* ixv_negotiate_api
*
* Negotiate the Mailbox API with the PF;
* start with the most featured API first.
************************************************************************/
static int
ixv_negotiate_api(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
int mbx_api[] = { ixgbe_mbox_api_15,
ixgbe_mbox_api_13,
ixgbe_mbox_api_12,
ixgbe_mbox_api_11,
ixgbe_mbox_api_10,
ixgbe_mbox_api_unknown };
int i = 0;
while (mbx_api[i] != ixgbe_mbox_api_unknown) {
if (ixgbevf_negotiate_api_version(hw, mbx_api[i]) == 0) {
if (hw->api_version >= ixgbe_mbox_api_15)
ixgbe_upgrade_mbx_params_vf(hw);
return (0);
}
i++;
}
/************************************************************************
* ixv_mc_array_itr
*
* An iterator function needed by the multicast shared code.
* It feeds the shared code routine the addresses in the
* array of ixv_set_rxfilter() one by one.
************************************************************************/
static u8 *
ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
{
struct ixgbe_mc_addr *mta;
mta = (struct ixgbe_mc_addr *)*update_ptr;
*vmdq = 0;
*update_ptr = (u8*)(mta + 1);
return (mta->addr);
} /* ixv_mc_array_itr */
/************************************************************************
* ixv_local_timer - Timer routine
*
* Checks for link status, updates statistics,
* and runs the watchdog check.
************************************************************************/
static void
ixv_local_timer(void *arg)
{
struct ixgbe_softc *sc = arg;
if (sc->schedule_wqs_ok) {
if (atomic_cas_uint(&sc->timer_pending, 0, 1) == 0)
workqueue_enqueue(sc->timer_wq,
&sc->timer_wc, NULL);
}
}
/*
* Check the TX queues status
* - mark hung queues so we don't schedule on them
* - watchdog only if all queues show hung
*/
que = sc->queues;
for (i = 0; i < sc->num_queues; i++, que++) {
/* Keep track of queues with work for soft irq */
if (que->txr->busy)
queues |= ((u64)1 << que->me);
/*
* Each time txeof runs without cleaning, but there
* are uncleaned descriptors it increments busy. If
* we get to the MAX we declare it hung.
*/
if (que->busy == IXGBE_QUEUE_HUNG) {
++hung;
/* Mark the queue as inactive */
sc->active_queues &= ~((u64)1 << que->me);
continue;
} else {
/* Check if we've come back from hung */
if ((sc->active_queues & ((u64)1 << que->me)) == 0)
sc->active_queues |= ((u64)1 << que->me);
}
if (que->busy >= IXGBE_MAX_TX_BUSY) {
device_printf(dev,
"Warning queue %d appears to be hung!\n", i);
que->txr->busy = IXGBE_QUEUE_HUNG;
++hung;
}
}
/* Only truly watchdog if all queues show hung */
if (hung == sc->num_queues)
goto watchdog;
#if 0
else if (queues != 0) { /* Force an IRQ on queues with work */
ixv_rearm_queues(sc, queues);
}
#endif
/************************************************************************
* ixv_update_link_status - Update OS on link state
*
* Note: Only updates the OS on the cached link state.
* The real check of the hardware only happens with
* a link interrupt.
************************************************************************/
static void
ixv_update_link_status(struct ixgbe_softc *sc)
{
struct ifnet *ifp = sc->ifp;
device_t dev = sc->dev;
KASSERT(mutex_owned(&sc->core_mtx));
if (sc->link_up && sc->link_enabled) {
if (sc->link_active != LINK_STATE_UP) {
if (bootverbose) {
const char *bpsmsg;
switch (sc->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
bpsmsg = "10 Gbps";
break;
case IXGBE_LINK_SPEED_5GB_FULL:
bpsmsg = "5 Gbps";
break;
case IXGBE_LINK_SPEED_2_5GB_FULL:
bpsmsg = "2.5 Gbps";
break;
case IXGBE_LINK_SPEED_1GB_FULL:
bpsmsg = "1 Gbps";
break;
case IXGBE_LINK_SPEED_100_FULL:
bpsmsg = "100 Mbps";
break;
case IXGBE_LINK_SPEED_10_FULL:
bpsmsg = "10 Mbps";
break;
default:
bpsmsg = "unknown speed";
break;
}
device_printf(dev, "Link is up %s %s \n",
bpsmsg, "Full Duplex");
}
sc->link_active = LINK_STATE_UP;
if_link_state_change(ifp, LINK_STATE_UP);
}
} else {
/*
* Do it when link active changes to DOWN. i.e.
* a) LINK_STATE_UNKNOWN -> LINK_STATE_DOWN
* b) LINK_STATE_UP -> LINK_STATE_DOWN
*/
if (sc->link_active != LINK_STATE_DOWN) {
if (bootverbose)
device_printf(dev, "Link is Down\n");
if_link_state_change(ifp, LINK_STATE_DOWN);
sc->link_active = LINK_STATE_DOWN;
}
}
} /* ixv_update_link_status */
/************************************************************************
* ixv_stop - Stop the hardware
*
* Disables all traffic on the adapter by issuing a
* global reset on the MAC and deallocates TX/RX buffers.
************************************************************************/
static void
ixv_ifstop(struct ifnet *ifp, int disable)
{
struct ixgbe_softc *sc = ifp->if_softc;
for (i = 0; i < sc->num_queues; i++, que++, txr++) {
if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
if (txr->txr_si != NULL)
softint_disestablish(txr->txr_si);
}
if (que->que_si != NULL)
softint_disestablish(que->que_si);
}
if (sc->txr_wq != NULL)
workqueue_destroy(sc->txr_wq);
if (sc->txr_wq_enqueued != NULL)
percpu_free(sc->txr_wq_enqueued, sizeof(u_int));
if (sc->que_wq != NULL)
workqueue_destroy(sc->que_wq);
/* Drain the Mailbox(link) queue */
if (sc->admin_wq != NULL) {
workqueue_destroy(sc->admin_wq);
sc->admin_wq = NULL;
}
if (sc->timer_wq != NULL) {
workqueue_destroy(sc->timer_wq);
sc->timer_wq = NULL;
}
} /* ixv_free_deferred_handlers */
/* Don't enable LRO by default */
#if 0
/* NetBSD doesn't support LRO yet */
ifp->if_capabilities |= IFCAP_LRO;
#endif
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
*/
ec->ec_ifmedia = &sc->media;
ifmedia_init_with_lock(&sc->media, IFM_IMASK, ixv_media_change,
ixv_media_status, &sc->core_mtx);
ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
/************************************************************************
* ixv_initialize_rss_mapping
************************************************************************/
static void
ixv_initialize_rss_mapping(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
u32 reta = 0, mrqc, rss_key[10];
int queue_id;
int i, j;
u32 rss_hash_config;
/* force use default RSS key. */
#ifdef __NetBSD__
rss_getkey((uint8_t *) &rss_key);
#else
if (sc->feat_en & IXGBE_FEATURE_RSS) {
/* Fetch the configured RSS key */
rss_getkey((uint8_t *)&rss_key);
} else {
/* set up random bits */
cprng_fast(&rss_key, sizeof(rss_key));
}
#endif
/* Now fill out hash function seeds */
for (i = 0; i < 10; i++)
IXGBE_WRITE_REG(hw, IXGBE_VFRSSRK(i), rss_key[i]);
/* Set up the redirection table */
for (i = 0, j = 0; i < 64; i++, j++) {
if (j == sc->num_queues)
j = 0;
if (sc->feat_en & IXGBE_FEATURE_RSS) {
/*
* Fetch the RSS bucket id for the given indirection
* entry. Cap it at the number of configured buckets
* (which is num_queues.)
*/
queue_id = rss_get_indirection_to_bucket(i);
queue_id = queue_id % sc->num_queues;
} else
queue_id = j;
/*
* The low 8 bits are for hash value (n+0);
* The next 8 bits are for hash value (n+1), etc.
*/
reta >>= 8;
reta |= ((uint32_t)queue_id) << 24;
if ((i & 3) == 3) {
IXGBE_WRITE_REG(hw, IXGBE_VFRETA(i >> 2), reta);
reta = 0;
}
}
/* Perform hash on these packet types */
if (sc->feat_en & IXGBE_FEATURE_RSS)
rss_hash_config = rss_gethashconfig();
else {
/*
* Disable UDP - IP fragments aren't currently being handled
* and so we end up with a mix of 2-tuple and 4-tuple
* traffic.
*/
rss_hash_config = RSS_HASHTYPE_RSS_IPV4
| RSS_HASHTYPE_RSS_TCP_IPV4
| RSS_HASHTYPE_RSS_IPV6
| RSS_HASHTYPE_RSS_TCP_IPV6;
}
mrqc = IXGBE_MRQC_RSSEN;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
device_printf(sc->dev, "%s: RSS_HASHTYPE_RSS_IPV6_EX "
"defined, but not supported\n", __func__);
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
device_printf(sc->dev, "%s: RSS_HASHTYPE_RSS_TCP_IPV6_EX "
"defined, but not supported\n", __func__);
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
device_printf(sc->dev, "%s: RSS_HASHTYPE_RSS_UDP_IPV6_EX "
"defined, but not supported\n", __func__);
IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, mrqc);
} /* ixv_initialize_rss_mapping */
/* Tell PF our max_frame size */
if (ixgbevf_rlpml_set_vf(hw, sc->max_frame_size) != 0) {
device_printf(sc->dev, "There is a problem with the PF "
"setup. It is likely the receive unit for this VF will "
"not function correctly.\n");
}
for (int i = 0; i < sc->num_queues; i++, rxr++) {
u64 rdba = rxr->rxdma.dma_paddr;
u32 reg, rxdctl;
int j = rxr->me;
/* Disable the queue */
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j));
rxdctl &= ~IXGBE_RXDCTL_ENABLE;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl);
for (int k = 0; k < 10; k++) {
if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) &
IXGBE_RXDCTL_ENABLE)
msec_delay(1);
else
break;
}
IXGBE_WRITE_BARRIER(hw);
/* Setup the Base and Length of the Rx Descriptor Ring */
IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(j),
(rdba & 0x00000000ffffffffULL));
IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(j), (rdba >> 32));
IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(j),
sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
/* Reset the ring indices */
IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0);
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0);
/* Set up the SRRCTL register */
reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(j));
reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
reg |= bufsz;
reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(j), reg);
/* Capture Rx Tail index */
rxr->tail = IXGBE_VFRDT(rxr->me);
/* Do the queue enabling last */
rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl);
for (int k = 0; k < 10; k++) {
if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) &
IXGBE_RXDCTL_ENABLE)
break;
msec_delay(1);
}
IXGBE_WRITE_BARRIER(hw);
/* Set the Tail Pointer */
#ifdef DEV_NETMAP
/*
* In netmap mode, we must preserve the buffers made
* available to userspace before the if_init()
* (this is true by default on the TX side, because
* init makes all buffers available to userspace).
*
* netmap_reset() and the device specific routines
* (e.g. ixgbe_setup_receive_rings()) map these
* buffers at the end of the NIC ring, so here we
* must set the RDT (tail) register to make sure
* they are not overwritten.
*
* In this driver the NIC ring starts at RDH = 0,
* RDT points to the last slot available for reception (?),
* so RDT = num_rx_desc - 1 means the whole ring is available.
*/
if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
(ifp->if_capenable & IFCAP_NETMAP)) {
struct netmap_adapter *na = NA(sc->ifp);
struct netmap_kring *kring = na->rx_rings[i];
int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
/* Enable HW tagging only if any vlan is attached */
hwtagging = (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING)
&& VLAN_ATTACHED(ec);
/* Enable the queues */
for (i = 0; i < sc->num_queues; i++) {
rxr = &sc->rx_rings[i];
ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(rxr->me));
if (hwtagging)
ctrl |= IXGBE_RXDCTL_VME;
else
ctrl &= ~IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(rxr->me), ctrl);
/*
* Let Rx path know that it needs to store VLAN tag
* as part of extra mbuf info.
*/
rxr->vtag_strip = hwtagging ? TRUE : FALSE;
}
} /* ixv_setup_vlan_tagging */
/*
* A soft reset zero's out the VFTA, so
* we need to repopulate it now.
*/
for (int i = 0; i < IXGBE_VFTA_SIZE; i++) {
if (sc->shadow_vfta[i] == 0)
continue;
vfta = sc->shadow_vfta[i];
/*
* Reconstruct the vlan id's
* based on the bits set in each
* of the array ints.
*/
for (int j = 0; j < 32; j++) {
retry = 0;
if ((vfta & ((u32)1 << j)) == 0)
continue;
vid = (i * 32) + j;
/* Call the shared code mailbox routine */
while ((rv = hw->mac.ops.set_vfta(hw, vid, 0, TRUE,
FALSE)) != 0) {
if (++retry > 5) {
device_printf(sc->dev,
"%s: max retry exceeded\n",
__func__);
break;
}
}
if (rv != 0) {
device_printf(sc->dev,
"failed to set vlan %d\n", vid);
error = EACCES;
}
}
}
return error;
} /* ixv_setup_vlan_support */
/*
* Control VLAN HW tagging when ec_nvlan is changed from 1 to 0
* or 0 to 1.
*/
if ((set && (ec->ec_nvlans == 1)) || (!set && (ec->ec_nvlans == 0)))
ixv_setup_vlan_tagging(sc);
return rv;
}
/************************************************************************
* ixv_register_vlan
*
* Run via a vlan config EVENT, it enables us to use the
* HW Filter table since we can get the vlan id. This just
* creates the entry in the soft version of the VFTA, init
* will repopulate the real table.
************************************************************************/
static int
ixv_register_vlan(struct ixgbe_softc *sc, u16 vtag)
{
struct ixgbe_hw *hw = &sc->hw;
u16 index, bit;
int error;
/* For VTEIAC */
mask = (1 << sc->vector);
for (i = 0; i < sc->num_queues; i++, que++)
mask |= (1 << que->msix);
IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask);
/* For VTEIMS */
IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, (1 << sc->vector));
que = sc->queues;
for (i = 0; i < sc->num_queues; i++, que++)
ixv_enable_queue(sc, que->msix);
/************************************************************************
* ixv_set_ivar
*
* Setup the correct IVAR register for a particular MSI-X interrupt
* - entry is the register array entry
* - vector is the MSI-X vector for this queue
* - type is RX/TX/MISC
************************************************************************/
static void
ixv_set_ivar(struct ixgbe_softc *sc, u8 entry, u8 vector, s8 type)
{
struct ixgbe_hw *hw = &sc->hw;
u32 ivar, index;
vector |= IXGBE_IVAR_ALLOC_VAL;
if (type == -1) { /* MISC IVAR */
ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC);
ivar &= ~0xFF;
ivar |= vector;
IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar);
} else { /* RX/TX IVARS */
index = (16 * (entry & 1)) + (8 * type);
ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1));
ivar &= ~(0xffUL << index);
ivar |= ((u32)vector << index);
IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar);
}
} /* ixv_set_ivar */
/* XXX We should sync EITR value calculation with ixgbe.c? */
for (int i = 0; i < sc->num_queues; i++, que++) {
/* First the RX queue entry */
ixv_set_ivar(sc, i, que->msix, 0);
/* ... and the TX */
ixv_set_ivar(sc, i, que->msix, 1);
/* Set an initial value in EITR */
ixv_eitr_write(sc, que->msix, IXGBE_EITR_DEFAULT);
}
/* For the mailbox interrupt */
ixv_set_ivar(sc, 1, sc->vector, -1);
} /* ixv_configure_ivars */
/************************************************************************
* ixv_init_stats
*
* The VF stats registers never have a truly virgin
* starting point, so this routine save initial vaules to
* last_<REGNAME>.
************************************************************************/
static void
ixv_init_stats(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
/************************************************************************
* ixv_print_debug_info
*
* Provides a way to take a look at important statistics
* maintained by the driver and hardware.
************************************************************************/
static void
ixv_print_debug_info(struct ixgbe_softc *sc)
{
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
int i;
/* A tad short on feature flags for VFs, atm. */
switch (sc->hw.mac.type) {
case ixgbe_mac_82599_vf:
break;
case ixgbe_mac_X540_vf:
break;
case ixgbe_mac_X550_vf:
case ixgbe_mac_X550EM_x_vf:
case ixgbe_mac_X550EM_a_vf:
sc->feat_cap |= IXGBE_FEATURE_NEEDS_CTXD;
break;
default:
break;
}
/* Enabled by default... */
/* Is a virtual function (VF) */
if (sc->feat_cap & IXGBE_FEATURE_VF)
sc->feat_en |= IXGBE_FEATURE_VF;
/* Netmap */
if (sc->feat_cap & IXGBE_FEATURE_NETMAP)
sc->feat_en |= IXGBE_FEATURE_NETMAP;
/* Receive-Side Scaling (RSS) */
if (sc->feat_cap & IXGBE_FEATURE_RSS)
sc->feat_en |= IXGBE_FEATURE_RSS;
/* Needs advanced context descriptor regardless of offloads req'd */
if (sc->feat_cap & IXGBE_FEATURE_NEEDS_CTXD)
sc->feat_en |= IXGBE_FEATURE_NEEDS_CTXD;
/*
* Check the number of multicast address. If it exceeds,
* return ENOSPC.
* Update this code when we support API 1.3.
*/
ETHER_LOCK(ec);
ETHER_FIRST_MULTI(step, ec, enm);
while (enm != NULL) {
mcnt++;
/*
* This code is before adding, so one room is required
* at least.
*/
if (mcnt > (IXGBE_MAX_VF_MC - 1)) {
overflow = true;
break;
}
ETHER_NEXT_MULTI(step, enm);
}
ETHER_UNLOCK(ec);
error = 0;
if (overflow && ((ec->ec_flags & ETHER_F_ALLMULTI) == 0)) {
error = hw->mac.ops.update_xcast_mode(hw,
IXGBEVF_XCAST_MODE_ALLMULTI);
if (error == IXGBE_ERR_NOT_TRUSTED) {
device_printf(sc->dev,
"this interface is not trusted\n");
error = EPERM;
} else if (error == IXGBE_ERR_FEATURE_NOT_SUPPORTED) {
device_printf(sc->dev,
"the PF doesn't support allmulti mode\n");
error = EOPNOTSUPP;
} else if (error) {
device_printf(sc->dev,
"number of Ethernet multicast addresses "
"exceeds the limit (%d). error = %d\n",
IXGBE_MAX_VF_MC, error);
error = ENOSPC;
} else
ec->ec_flags |= ETHER_F_ALLMULTI;
}
if (error)
return error;
}
/*FALLTHROUGH*/
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
break;
case SIOCSIFCAP:
IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
break;
case SIOCSIFMTU:
IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
break;
case SIOCZIFDATA:
IOCTL_DEBUGOUT("ioctl: SIOCZIFDATA (Zero counter)");
ixv_update_stats(sc);
ixv_clear_evcnt(sc);
break;
default:
IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)", (int)command);
break;
}
switch (command) {
case SIOCSIFCAP:
/* Layer-4 Rx checksum offload has to be turned on and
* off as a unit.
*/
l4csum_en = ifcr->ifcr_capenable & l4csum;
if (l4csum_en != l4csum && l4csum_en != 0)
return EINVAL;
/*FALLTHROUGH*/
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFFLAGS:
case SIOCSIFMTU:
default:
if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
return error;
if ((ifp->if_flags & IFF_RUNNING) == 0)
;
else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
IXGBE_CORE_LOCK(sc);
ixv_init_locked(sc);
IXGBE_CORE_UNLOCK(sc);
} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
/*
* Multicast list has changed; set the hardware filter
* accordingly.
*/
IXGBE_CORE_LOCK(sc);
ixv_disable_intr(sc);
ixv_set_rxfilter(sc);
ixv_enable_intr(sc);
IXGBE_CORE_UNLOCK(sc);
}
return 0;
}
} /* ixv_ioctl */
if (ifp->if_flags & IFF_RUNNING) {
IXGBE_TX_LOCK(txr);
more = ixgbe_txeof(txr);
if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX))
if (!ixgbe_mq_ring_empty(ifp, txr->txr_interq))
ixgbe_mq_start_locked(ifp, txr);
/* Only for queue 0 */
/* NetBSD still needs this for CBQ */
if ((&sc->queues[0] == que)
&& (!ixgbe_legacy_ring_empty(ifp, NULL)))
ixgbe_legacy_start_locked(ifp, txr);
IXGBE_TX_UNLOCK(txr);
more |= ixgbe_rxeof(que);
if (more) {
IXGBE_EVC_ADD(&que->req, 1);
if (sc->txrx_use_workqueue) {
/*
* "enqueued flag" is not required here
* the same as ixg(4). See ixgbe_msix_que().
*/
workqueue_enqueue(sc->que_wq,
&que->wq_cookie, curcpu());
} else
softint_schedule(que->que_si);
return;
}
}
/* Re-enable this interrupt */
ixv_enable_queue(sc, que->msix);
/*
* Due to a broken design QEMU will fail to properly
* enable the guest for MSI-X unless the vectors in
* the table are all set up, so we must rewrite the
* ENABLE in the MSI-X control register again at this
* point to cause it to successfully initialize us.
*/
if (sc->hw.mac.type == ixgbe_mac_82599_vf) {
pci_get_capability(pc, tag, PCI_CAP_MSIX, &rid, NULL);
rid += PCI_MSIX_CTL;
msix_ctrl = pci_conf_read(pc, tag, rid);
msix_ctrl |= PCI_MSIX_CTL_ENABLE;
pci_conf_write(pc, tag, rid, msix_ctrl);
}
/************************************************************************
* ixv_configure_interrupts - Setup MSI-X resources
*
* Note: The VF device MUST use MSI-X, there is no fallback.
************************************************************************/
static int
ixv_configure_interrupts(struct ixgbe_softc *sc)
{
device_t dev = sc->dev;
int want, queues, msgs;
/* Must have at least 2 MSI-X vectors */
msgs = pci_msix_count(sc->osdep.pc, sc->osdep.tag);
if (msgs < 2) {
aprint_error_dev(dev, "MSIX config error\n");
return (ENXIO);
}
msgs = MIN(msgs, IXG_MAX_NINTR);
/* Figure out a reasonable auto config value */
queues = (ncpu > (msgs - 1)) ? (msgs - 1) : ncpu;
if (ixv_num_queues != 0)
queues = ixv_num_queues;
else if ((ixv_num_queues == 0) && (queues > IXGBE_VF_MAX_TX_QUEUES))
queues = IXGBE_VF_MAX_TX_QUEUES;
/*
* Want vectors for the queues,
* plus an additional for mailbox.
*/
want = queues + 1;
if (msgs >= want)
msgs = want;
else {
aprint_error_dev(dev,
"MSI-X Configuration Problem, "
"%d vectors but %d queues wanted!\n", msgs, want);
return -1;
}
/************************************************************************
* ixv_check_link - Used in the local timer to poll for link changes
************************************************************************/
static s32
ixv_check_link(struct ixgbe_softc *sc)
{
s32 error;