Copyright (c) 2001-2017, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
/*
* Copyright (c) 2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Coyote Point Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* Sysctl handlers */
static int ixgbe_sysctl_flowcntl(SYSCTLFN_PROTO);
static int ixgbe_sysctl_advertise(SYSCTLFN_PROTO);
static int ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_dmac(SYSCTLFN_PROTO);
static int ixgbe_sysctl_phy_temp(SYSCTLFN_PROTO);
static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTLFN_PROTO);
#ifdef IXGBE_DEBUG
static int ixgbe_sysctl_power_state(SYSCTLFN_PROTO);
static int ixgbe_sysctl_print_rss_config(SYSCTLFN_PROTO);
#endif
static int ixgbe_sysctl_next_to_check_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_next_to_refresh_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_rdh_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_rdt_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_tdt_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_tdh_handler(SYSCTLFN_PROTO);
static int ixgbe_sysctl_eee_state(SYSCTLFN_PROTO);
static int ixgbe_sysctl_debug(SYSCTLFN_PROTO);
static int ixgbe_sysctl_rx_copy_len(SYSCTLFN_PROTO);
static int ixgbe_sysctl_tx_process_limit(SYSCTLFN_PROTO);
static int ixgbe_sysctl_rx_process_limit(SYSCTLFN_PROTO);
static int ixgbe_sysctl_wol_enable(SYSCTLFN_PROTO);
static int ixgbe_sysctl_wufc(SYSCTLFN_PROTO);
/* Interrupt functions */
static int ixgbe_msix_que(void *);
static int ixgbe_msix_admin(void *);
static void ixgbe_intr_admin_common(struct ixgbe_softc *, u32, u32 *);
static int ixgbe_legacy_irq(void *);
/*
* AIM: Adaptive Interrupt Moderation
* which means that the interrupt rate
* is varied over time based on the
* traffic for that interrupt vector
*/
static bool ixgbe_enable_aim = true;
#define SYSCTL_INT(_a1, _a2, _a3, _a4, _a5, _a6, _a7)
SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RDTUN, &ixgbe_enable_aim, 0,
"Enable adaptive interrupt moderation");
static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
&ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
/* How many packets rxeof tries to clean at a time */
static int ixgbe_rx_process_limit = 256;
SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
&ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited");
/* How many packets txeof tries to clean at a time */
static int ixgbe_tx_process_limit = 256;
SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
&ixgbe_tx_process_limit, 0,
"Maximum number of sent packets to process at a time, -1 means unlimited");
/* Flow control setting, default to full */
static int ixgbe_flow_control = ixgbe_fc_full;
SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN,
&ixgbe_flow_control, 0, "Default flow control used for all adapters");
/* Which packet processing uses workqueue or softint */
static bool ixgbe_txrx_workqueue = false;
/*
* Smart speed setting, default to on
* this only works as a compile option
* right now as its during attach, set
* this to 'ixgbe_smart_speed_off' to
* disable.
*/
static int ixgbe_smart_speed = ixgbe_smart_speed_on;
/*
* MSI-X should be the default for best performance,
* but this allows it to be forced off for testing.
*/
static int ixgbe_enable_msix = 1;
SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
"Enable MSI-X interrupts");
/*
* Number of Queues, can be set to 0,
* it then autoconfigures based on the
* number of cpus and number of MSI-X vectors.
* This can be overridden manually here.
*/
static int ixgbe_num_queues = 0;
SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
"Number of queues to configure, 0 indicates autoconfigure");
/*
* Number of TX descriptors per ring,
* setting higher than RX as this seems
* the better performing choice.
*/
static int ixgbe_txd = DEFAULT_TXD;
SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
"Number of transmit descriptors per queue");
/* Number of RX descriptors per ring */
static int ixgbe_rxd = DEFAULT_RXD;
SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
"Number of receive descriptors per queue");
/*
* Defining this on will allow the use
* of unsupported SFP+ modules, note that
* doing so you are on your own :)
*/
static int allow_unsupported_sfp = false;
#define TUNABLE_INT(__x, __y)
TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
/*
* Not sure if Flow Director is fully baked,
* so we'll default to turning it off.
*/
static int ixgbe_enable_fdir = 0;
SYSCTL_INT(_hw_ix, OID_AUTO, enable_fdir, CTLFLAG_RDTUN, &ixgbe_enable_fdir, 0,
"Enable Flow Director");
#if 0
static int (*ixgbe_start_locked)(struct ifnet *, struct tx_ring *);
static int (*ixgbe_ring_empty)(struct ifnet *, pcq_t *);
#endif
#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
/* Interval between reports of errors */
static const struct timeval ixgbe_errlog_intrvl = { 60, 0 }; /* 60s */
/************************************************************************
* ixgbe_initialize_rss_mapping
************************************************************************/
static void
ixgbe_initialize_rss_mapping(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
u32 reta = 0, mrqc, rss_key[10];
int queue_id, table_size, index_mult;
int i, j;
u32 rss_hash_config;
/* force use default RSS key. */
#ifdef __NetBSD__
rss_getkey((uint8_t *) &rss_key);
#else
if (sc->feat_en & IXGBE_FEATURE_RSS) {
/* Fetch the configured RSS key */
rss_getkey((uint8_t *) &rss_key);
} else {
/* set up random bits */
cprng_fast(&rss_key, sizeof(rss_key));
}
#endif
/* Set multiplier for RETA setup and table size based on MAC */
index_mult = 0x1;
table_size = 128;
switch (sc->hw.mac.type) {
case ixgbe_mac_82598EB:
index_mult = 0x11;
break;
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
table_size = 512;
break;
default:
break;
}
/* Set up the redirection table */
for (i = 0, j = 0; i < table_size; i++, j++) {
if (j == sc->num_queues)
j = 0;
if (sc->feat_en & IXGBE_FEATURE_RSS) {
/*
* Fetch the RSS bucket id for the given indirection
* entry. Cap it at the number of configured buckets
* (which is num_queues.)
*/
queue_id = rss_get_indirection_to_bucket(i);
queue_id = queue_id % sc->num_queues;
} else
queue_id = (j * index_mult);
/*
* The low 8 bits are for hash value (n+0);
* The next 8 bits are for hash value (n+1), etc.
*/
reta = reta >> 8;
reta = reta | (((uint32_t) queue_id) << 24);
if ((i & 3) == 3) {
if (i < 128)
IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
else
IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
reta);
reta = 0;
}
}
/* Now fill our hash function seeds */
for (i = 0; i < 10; i++)
IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
/* Perform hash on these packet types */
if (sc->feat_en & IXGBE_FEATURE_RSS)
rss_hash_config = rss_gethashconfig();
else {
/*
* Disable UDP - IP fragments aren't currently being handled
* and so we end up with a mix of 2-tuple and 4-tuple
* traffic.
*/
rss_hash_config = RSS_HASHTYPE_RSS_IPV4
| RSS_HASHTYPE_RSS_TCP_IPV4
| RSS_HASHTYPE_RSS_IPV6
| RSS_HASHTYPE_RSS_TCP_IPV6
| RSS_HASHTYPE_RSS_IPV6_EX
| RSS_HASHTYPE_RSS_TCP_IPV6_EX;
}
mrqc = IXGBE_MRQC_RSSEN;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
mrqc |= ixgbe_get_mrqc(sc->iov_mode);
IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
} /* ixgbe_initialize_rss_mapping */
for (i = 0; i < sc->num_queues; i++, rxr++) {
u64 rdba = rxr->rxdma.dma_paddr;
u32 reg;
int regnum = i / 4; /* 1 register per 4 queues */
int regshift = i % 4; /* 4 bits per 1 queue */
j = rxr->me;
/* Setup the Base and Length of the Rx Descriptor Ring */
IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j),
(rdba & 0x00000000ffffffffULL));
IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32));
IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j),
sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
/* Set up the SRRCTL register */
srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j));
srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
srrctl |= bufsz;
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
/*
* Set DROP_EN iff we have no flow control and >1 queue.
* Note that srrctl was cleared shortly before during reset,
* so we do not need to clear the bit, but do it just in case
* this code is moved elsewhere.
*/
if ((sc->num_queues > 1) &&
(sc->hw.fc.requested_mode == ixgbe_fc_none))
srrctl |= IXGBE_SRRCTL_DROP_EN;
else
srrctl &= ~IXGBE_SRRCTL_DROP_EN;
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl);
/* Setup the HW Rx Head and Tail Descriptor Pointers */
IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0);
IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0);
/* Set the driver rx tail address */
rxr->tail = IXGBE_RDT(rxr->me);
}
/* Setup the Base and Length of the Tx Descriptor Ring */
for (i = 0; i < sc->num_queues; i++, txr++) {
u64 tdba = txr->txdma.dma_paddr;
u32 txctrl = 0;
u32 tqsmreg, reg;
int regnum = i / 4; /* 1 register per 4 queues */
int regshift = i % 4; /* 4 bits per 1 queue */
int j = txr->me;
/* Enable WoL (if supported) */
ixgbe_check_wol_support(sc);
/* Register for VLAN events */
ether_set_vlan_cb(&sc->osdep.ec, ixgbe_vlan_cb);
/* Verify adapter fan is still functional (if applicable) */
if (sc->feat_en & IXGBE_FEATURE_FAN_FAIL) {
u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
ixgbe_check_fan_failure(sc, esdp, FALSE);
}
/* Set an initial default flow control value */
hw->fc.requested_mode = ixgbe_flow_control;
/* Do descriptor calc and sanity checks */
if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
aprint_error_dev(dev, "Invalid TX ring size (%d). "
"It must be between %d and %d, "
"inclusive, and must be a multiple of %zu. "
"Using default value of %d instead.\n",
ixgbe_txd, MIN_TXD, MAX_TXD,
DBA_ALIGN / sizeof(union ixgbe_adv_tx_desc),
DEFAULT_TXD);
sc->num_tx_desc = DEFAULT_TXD;
} else
sc->num_tx_desc = ixgbe_txd;
if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
aprint_error_dev(dev, "Invalid RX ring size (%d). "
"It must be between %d and %d, "
"inclusive, and must be a multiple of %zu. "
"Using default value of %d instead.\n",
ixgbe_rxd, MIN_RXD, MAX_RXD,
DBA_ALIGN / sizeof(union ixgbe_adv_rx_desc),
DEFAULT_RXD);
sc->num_rx_desc = DEFAULT_RXD;
} else
sc->num_rx_desc = ixgbe_rxd;
/* Sysctls for limiting the amount of work done in the taskqueues */
sc->rx_process_limit
= (ixgbe_rx_process_limit <= sc->num_rx_desc)
? ixgbe_rx_process_limit : sc->num_rx_desc;
sc->tx_process_limit
= (ixgbe_tx_process_limit <= sc->num_tx_desc)
? ixgbe_tx_process_limit : sc->num_tx_desc;
/* Set default high limit of copying mbuf in rxeof */
sc->rx_copy_len = IXGBE_RX_COPY_LEN_MAX;
/* Make sure we have a good EEPROM before we read from it */
if (ixgbe_validate_eeprom_checksum(&sc->hw, NULL) < 0) {
aprint_error_dev(dev, "The EEPROM Checksum Is Not Valid\n");
error = EIO;
goto err_late;
}
aprint_normal("%s:", device_xname(dev));
/* NVM Image Version */
high = low = 0;
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
/*
* Print version from the dev starter version (0x29). The
* location is the same as newer device's IXGBE_NVM_MAP_VER.
*/
hw->eeprom.ops.read(hw, IXGBE_NVM_MAP_VER, &nvmreg);
if (nvmreg == 0xffff)
break;
high = (nvmreg >> 12) & 0x0f;
low = (nvmreg >> 4) & 0xff;
id = nvmreg & 0x0f;
/*
* The following output might not be correct. Some 82598 cards
* have 0x1070 or 0x2090. 82598 spec update notes about 2.9.0.
*/
aprint_normal(" NVM Image Version %u.%u.%u,", high, low, id);
break;
case ixgbe_mac_X540:
case ixgbe_mac_X550EM_a:
hw->eeprom.ops.read(hw, IXGBE_NVM_IMAGE_VER, &nvmreg);
if (nvmreg == 0xffff)
break;
high = (nvmreg >> 12) & 0x0f;
low = (nvmreg >> 4) & 0xff;
id = nvmreg & 0x0f;
aprint_normal(" NVM Image Version %u.", high);
if (hw->mac.type == ixgbe_mac_X540)
str = "%x";
else
str = "%02x";
aprint_normal(str, low);
aprint_normal(" ID 0x%x,", id);
break;
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550:
hw->eeprom.ops.read(hw, IXGBE_NVM_IMAGE_VER, &nvmreg);
if (nvmreg == 0xffff)
break;
high = (nvmreg >> 12) & 0x0f;
low = nvmreg & 0xff;
aprint_normal(" NVM Image Version %u.%02x,", high, low);
break;
default:
break;
}
hw->eeprom.nvm_image_ver_high = high;
hw->eeprom.nvm_image_ver_low = low;
/* PHY firmware revision */
switch (hw->mac.type) {
case ixgbe_mac_X540:
case ixgbe_mac_X550:
hw->eeprom.ops.read(hw, IXGBE_PHYFW_REV, &nvmreg);
if (nvmreg == 0xffff)
break;
high = (nvmreg >> 12) & 0x0f;
low = (nvmreg >> 4) & 0xff;
id = nvmreg & 0x000f;
aprint_normal(" PHY FW Revision %u.", high);
if (hw->mac.type == ixgbe_mac_X540)
str = "%x";
else
str = "%02x";
aprint_normal(str, low);
aprint_normal(" ID 0x%x,", id);
break;
default:
break;
}
/* NVM Map version & OEM NVM Image version */
switch (hw->mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
hw->eeprom.ops.read(hw, IXGBE_NVM_MAP_VER, &nvmreg);
if (nvmreg != 0xffff) {
high = (nvmreg >> 12) & 0x0f;
low = nvmreg & 0x00ff;
aprint_normal(" NVM Map version %u.%02x,", high, low);
}
hw->eeprom.ops.read(hw, IXGBE_OEM_NVM_IMAGE_VER, &nvmreg);
if (nvmreg != 0xffff) {
high = (nvmreg >> 12) & 0x0f;
low = nvmreg & 0x00ff;
aprint_verbose(" OEM NVM Image version %u.%02x,", high,
low);
}
break;
default:
break;
}
/* Printed Board Assembly number */
error = ixgbe_read_pba_string(hw, buf, IXGBE_PBANUM_LENGTH);
aprint_normal_dev(dev, "PBA number %s\n", error ? "unknown" : buf);
/* Recovery mode */
switch (sc->hw.mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
/* >= 2.00 */
if (hw->eeprom.nvm_image_ver_high >= 2) {
sc->feat_cap |= IXGBE_FEATURE_RECOVERY_MODE;
sc->feat_en |= IXGBE_FEATURE_RECOVERY_MODE;
}
break;
default:
break;
}
if (sc->feat_en & IXGBE_FEATURE_MSIX) {
error = ixgbe_allocate_msix(sc, pa);
if (error) {
/* Free allocated queue structures first */
ixgbe_free_queues(sc);
/* Fallback to legacy interrupt */
if (sc->feat_cap & IXGBE_FEATURE_MSI)
sc->feat_en |= IXGBE_FEATURE_MSI;
sc->num_queues = 1;
/* Allocate our TX/RX Queues again */
if (ixgbe_allocate_queues(sc)) {
error = ENOMEM;
goto err_out;
}
}
}
if ((sc->feat_en & IXGBE_FEATURE_MSIX) == 0)
error = ixgbe_allocate_legacy(sc, pa);
if (error)
goto err_late;
/* Tasklets for Link, SFP, Multispeed Fiber and Flow Director */
mutex_init(&(sc)->admin_mtx, MUTEX_DEFAULT, IPL_NET);
snprintf(wqname, sizeof(wqname), "%s-admin", device_xname(dev));
error = workqueue_create(&sc->admin_wq, wqname,
ixgbe_handle_admin, sc, IXGBE_WORKQUEUE_PRI, IPL_NET, WQ_MPSAFE);
if (error) {
aprint_error_dev(dev,
"could not create admin workqueue (%d)\n", error);
goto err_out;
}
error = ixgbe_start_hw(hw);
switch (error) {
case IXGBE_ERR_EEPROM_VERSION:
aprint_error_dev(dev,
"This device is a pre-production adapter/"
"LOM. Please be aware there may be issues associated "
"with your hardware.\nIf you are experiencing problems "
"please contact your Intel or hardware representative "
"who provided you with this hardware.\n");
break;
default:
break;
}
/* Setup OS specific network interface */
if (ixgbe_setup_interface(dev, sc) != 0)
goto err_late;
/*
* Print PHY ID only for copper PHY. On device which has SFP(+) cage
* and a module is inserted, phy.id is not MII PHY id but SFF 8024 ID.
*/
if (hw->phy.media_type == ixgbe_media_type_copper) {
uint16_t id1, id2;
int oui, model, rev;
char descr[MII_MAX_DESCR_LEN];
/* Enable EEE power saving */
if (sc->feat_cap & IXGBE_FEATURE_EEE)
hw->mac.ops.setup_eee(hw,
sc->feat_en & IXGBE_FEATURE_EEE);
/* Enable power to the phy. */
if (!unsupported_sfp) {
/* Enable the optics for 82599 SFP+ fiber */
ixgbe_enable_tx_laser(hw);
/*
* XXX Currently, ixgbe_set_phy_power() supports only copper
* PHY, so it's not required to test with !unsupported_sfp.
*/
ixgbe_set_phy_power(hw, TRUE);
}
/************************************************************************
* ixgbe_check_wol_support
*
* Checks whether the adapter's ports are capable of
* Wake On LAN by reading the adapter's NVM.
*
* Sets each port's hw->wol_enabled value depending
* on the value read here.
************************************************************************/
static void
ixgbe_check_wol_support(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
u16 dev_caps = 0;
/* Find out WoL support for port */
sc->wol_support = hw->wol_enabled = 0;
ixgbe_get_device_caps(hw, &dev_caps);
if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) &&
hw->bus.func == 0))
sc->wol_support = hw->wol_enabled = 1;
/* Save initial wake up filter configuration */
sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
return;
} /* ixgbe_check_wol_support */
/************************************************************************
* ixgbe_setup_interface
*
* Setup networking device structure and register an interface.
************************************************************************/
static int
ixgbe_setup_interface(device_t dev, struct ixgbe_softc *sc)
{
struct ethercom *ec = &sc->osdep.ec;
struct ifnet *ifp;
/*
* Don't turn this on by default, if vlans are
* created on another pseudo device (eg. lagg)
* then vlan events are not passed thru, breaking
* operation, but with HW FILTER off it works. If
* using vlans directly on the ixgbe driver you can
* enable this and get full hardware tag filtering.
*/
ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
*/
ec->ec_ifmedia = &sc->media;
ifmedia_init_with_lock(&sc->media, IFM_IMASK, ixgbe_media_change,
ixgbe_media_status, &sc->core_mtx);
if (hw->mac.ops.check_link)
err = ixgbe_check_link(hw, &sc->link_speed,
&sc->link_up, FALSE);
if (err)
return;
/*
* Check if it's the first call. If it's the first call,
* get value for auto negotiation.
*/
autoneg = hw->phy.autoneg_advertised;
if ((IFM_SUBTYPE(ifm->ifm_cur->ifm_media) != IFM_NONE)
&& ((!autoneg) && (hw->mac.ops.get_link_capabilities)))
err = hw->mac.ops.get_link_capabilities(hw, &autoneg,
&negotiate);
if (err)
return;
if (hw->mac.ops.setup_link)
err = hw->mac.ops.setup_link(hw, autoneg,
sc->link_up);
}
} /* ixgbe_config_link */
/*
* Fill out the OS statistics structure. Only RX errors are required
* here because all TX counters are incremented in the TX path and
* normal RX counters are prepared in ether_input().
*/
net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
if_statadd_ref(ifp, nsr, if_iqdrops, total_missed_rx + total_qprdc);
for (i = 0; i < IXGBE_TC_COUNTER_NUM; i++) {
if (i < __arraycount(stats->mpc)) {
IXGBE_EVC_STORE(&stats->mpc[i], 0);
if (hw->mac.type == ixgbe_mac_82598EB)
IXGBE_EVC_STORE(&stats->rnbc[i], 0);
}
if (i < __arraycount(stats->pxontxc)) {
IXGBE_EVC_STORE(&stats->pxontxc[i], 0);
IXGBE_EVC_STORE(&stats->pxonrxc[i], 0);
IXGBE_EVC_STORE(&stats->pxofftxc[i], 0);
IXGBE_EVC_STORE(&stats->pxoffrxc[i], 0);
if (hw->mac.type >= ixgbe_mac_82599EB)
IXGBE_EVC_STORE(&stats->pxon2offc[i], 0);
}
}
/*
* Control VLAN HW tagging when ec_nvlan is changed from 1 to 0
* or 0 to 1.
*/
if ((set && (ec->ec_nvlans == 1)) || (!set && (ec->ec_nvlans == 0)))
ixgbe_setup_vlan_hw_tagging(sc);
return rv;
}
/************************************************************************
* ixgbe_register_vlan
*
* Run via vlan config EVENT, it enables us to use the
* HW Filter table since we can get the vlan id. This
* just creates the entry in the soft version of the
* VFTA, init will repopulate the real table.
************************************************************************/
static int
ixgbe_register_vlan(struct ixgbe_softc *sc, u16 vtag)
{
u16 index, bit;
int error;
/*
* Part 2:
* Setup VLAN HW filter
*/
/* Cleanup shadow_vfta */
for (i = 0; i < IXGBE_VFTA_SIZE; i++)
sc->shadow_vfta[i] = 0;
/* Generate shadow_vfta from ec_vids */
ETHER_LOCK(ec);
SIMPLEQ_FOREACH(vlanidp, &ec->ec_vids, vid_list) {
uint32_t idx;
idx = vlanidp->vid / 32;
KASSERT(idx < IXGBE_VFTA_SIZE);
sc->shadow_vfta[idx] |= (u32)1 << (vlanidp->vid % 32);
}
ETHER_UNLOCK(ec);
for (i = 0; i < IXGBE_VFTA_SIZE; i++)
IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), sc->shadow_vfta[i]);
ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
/* Enable the Filter Table if enabled */
if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER)
ctrl |= IXGBE_VLNCTRL_VFE;
else
ctrl &= ~IXGBE_VLNCTRL_VFE;
IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
} /* ixgbe_setup_vlan_hw_support */
/************************************************************************
* ixgbe_get_slot_info
*
* Get the width and transaction speed of
* the slot this adapter is plugged into.
************************************************************************/
static void
ixgbe_get_slot_info(struct ixgbe_softc *sc)
{
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
u32 offset;
u16 link;
int bus_info_valid = TRUE;
/* Some devices are behind an internal bridge */
switch (hw->device_id) {
case IXGBE_DEV_ID_82599_SFP_SF_QP:
case IXGBE_DEV_ID_82599_QSFP_SF_QP:
goto get_parent_info;
default:
break;
}
ixgbe_get_bus_info(hw);
/*
* Some devices don't use PCI-E, but there is no need
* to display "Unknown" for bus speed and width.
*/
switch (hw->mac.type) {
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
return;
default:
goto display;
}
get_parent_info:
/*
* For the Quad port adapter we need to parse back
* up the PCI tree to find the speed of the expansion
* slot into which this adapter is plugged. A bit more work.
*/
dev = device_parent(device_parent(dev));
#if 0
#ifdef IXGBE_DEBUG
device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev),
pci_get_slot(dev), pci_get_function(dev));
#endif
dev = device_parent(device_parent(dev));
#ifdef IXGBE_DEBUG
device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev),
pci_get_slot(dev), pci_get_function(dev));
#endif
#endif
/* Now get the PCI Express Capabilities offset */
if (pci_get_capability(sc->osdep.pc, sc->osdep.tag,
PCI_CAP_PCIEXPRESS, &offset, NULL)) {
/*
* Hmm...can't get PCI-Express capabilities.
* Falling back to default method.
*/
bus_info_valid = FALSE;
ixgbe_get_bus_info(hw);
goto display;
}
/* ...and read the Link Status Register */
link = pci_conf_read(sc->osdep.pc, sc->osdep.tag,
offset + PCIE_LCSR) >> 16;
ixgbe_set_pci_config_data_generic(hw, link);
if (bus_info_valid) {
if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
(hw->bus.speed == ixgbe_bus_speed_2500))) {
device_printf(dev, "PCI-Express bandwidth available"
" for this card\n is not sufficient for"
" optimal performance.\n");
device_printf(dev, "For optimal performance a x8 "
"PCIE, or x4 PCIE Gen2 slot is required.\n");
}
if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
(hw->bus.speed < ixgbe_bus_speed_8000))) {
device_printf(dev, "PCI-Express bandwidth available"
" for this card\n is not sufficient for"
" optimal performance.\n");
device_printf(dev, "For optimal performance a x8 "
"PCIE Gen3 slot is required.\n");
}
} else
device_printf(dev,
"Unable to determine slot speed/width. The speed/width "
"reported are that of the internal switch.\n");
if (que->txrx_use_workqueue) {
/*
* sc->que_wq is bound to each CPU instead of
* each NIC queue to reduce workqueue kthread. As we
* should consider about interrupt affinity in this
* function, the workqueue kthread must be WQ_PERCPU.
* If create WQ_PERCPU workqueue kthread for each NIC
* queue, that number of created workqueue kthread is
* (number of used NIC queue) * (number of CPUs) =
* (number of CPUs) ^ 2 most often.
*
* The same NIC queue's interrupts are avoided by
* masking the queue's interrupt. And different
* NIC queue's interrupts use different struct work
* (que->wq_cookie). So, "enqueued flag" to avoid
* twice workqueue_enqueue() is not required .
*/
workqueue_enqueue(sc->que_wq, &que->wq_cookie, curcpu());
} else
softint_schedule(que->que_si);
}
/*
* Don't change "que->txrx_use_workqueue" from this point to avoid
* flip-flopping softint/workqueue mode in one deferred processing.
*/
que->txrx_use_workqueue = sc->txrx_use_workqueue;
if (sc->enable_aim == false)
goto no_calc;
/*
* Do Adaptive Interrupt Moderation:
* - Write out last calculated setting
* - Calculate based on average size over
* the last interval.
*/
if (que->eitr_setting)
ixgbe_eitr_write(sc, que->msix, que->eitr_setting);
que->eitr_setting = 0;
/* Idle, do nothing */
if ((txr->bytes == 0) && (rxr->bytes == 0))
goto no_calc;
if ((txr->bytes) && (txr->packets))
newitr = txr->bytes/txr->packets;
if ((rxr->bytes) && (rxr->packets))
newitr = uimax(newitr, (rxr->bytes / rxr->packets));
newitr += 24; /* account for hardware frame, crc */
/* set an upper boundary */
newitr = uimin(newitr, 3000);
/* Be nice to the mid range */
if ((newitr > 300) && (newitr < 1200))
newitr = (newitr / 3);
else
newitr = (newitr / 2);
/*
* When RSC is used, ITR interval must be larger than RSC_DELAY.
* Currently, we use 2us for RSC_DELAY. The minimum value is always
* greater than 2us on 100M (and 10M?(not documented)), but it's not
* on 1G and higher.
*/
if ((sc->link_speed != IXGBE_LINK_SPEED_100_FULL)
&& (sc->link_speed != IXGBE_LINK_SPEED_10_FULL))
if (newitr < IXGBE_MIN_RSC_EITR_10G1G)
newitr = IXGBE_MIN_RSC_EITR_10G1G;
/* save for next interrupt */
que->eitr_setting = newitr;
/************************************************************************
* ixgbe_media_status - Media Ioctl callback
*
* Called whenever the user queries the status of
* the interface using ifconfig.
************************************************************************/
static void
ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct ixgbe_softc *sc = ifp->if_softc;
struct ixgbe_hw *hw = &sc->hw;
int layer;
/* Display current flow control setting used on link */
if (hw->fc.current_mode == ixgbe_fc_rx_pause ||
hw->fc.current_mode == ixgbe_fc_full)
ifmr->ifm_active |= IFM_ETH_RXPAUSE;
if (hw->fc.current_mode == ixgbe_fc_tx_pause ||
hw->fc.current_mode == ixgbe_fc_full)
ifmr->ifm_active |= IFM_ETH_TXPAUSE;
return;
} /* ixgbe_media_status */
/************************************************************************
* ixgbe_media_change - Media Ioctl callback
*
* Called when the user changes speed/duplex using
* media/mediopt option with ifconfig.
************************************************************************/
static int
ixgbe_media_change(struct ifnet *ifp)
{
struct ixgbe_softc *sc = ifp->if_softc;
struct ifmedia *ifm = &sc->media;
struct ixgbe_hw *hw = &sc->hw;
ixgbe_link_speed speed = 0;
ixgbe_link_speed link_caps = 0;
bool negotiate = false;
s32 err = IXGBE_NOT_IMPLEMENTED;
INIT_DEBUGOUT("ixgbe_media_change: begin");
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
if (hw->phy.media_type == ixgbe_media_type_backplane)
return (EPERM);
/*
* We don't actually need to check against the supported
* media types of the adapter; ifmedia will take care of
* that for us.
*/
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
err = hw->mac.ops.get_link_capabilities(hw, &link_caps,
&negotiate);
if (err != IXGBE_SUCCESS) {
device_printf(sc->dev, "Unable to determine "
"supported advertise speeds\n");
return (ENODEV);
}
speed |= link_caps;
break;
case IFM_10G_T:
case IFM_10G_LRM:
case IFM_10G_LR:
case IFM_10G_TWINAX:
case IFM_10G_SR:
case IFM_10G_CX4:
case IFM_10G_KR:
case IFM_10G_KX4:
speed |= IXGBE_LINK_SPEED_10GB_FULL;
break;
case IFM_5000_T:
speed |= IXGBE_LINK_SPEED_5GB_FULL;
break;
case IFM_2500_T:
case IFM_2500_KX:
speed |= IXGBE_LINK_SPEED_2_5GB_FULL;
break;
case IFM_1000_T:
case IFM_1000_LX:
case IFM_1000_SX:
case IFM_1000_KX:
speed |= IXGBE_LINK_SPEED_1GB_FULL;
break;
case IFM_100_TX:
speed |= IXGBE_LINK_SPEED_100_FULL;
break;
case IFM_10_T:
speed |= IXGBE_LINK_SPEED_10_FULL;
break;
case IFM_NONE:
break;
default:
goto invalid;
}
/*
* First get the cause.
*
* The specifications of 82598, 82599, X540 and X550 say EICS register
* is write only. However, Linux says it is a workaround for silicon
* errata to read EICS instead of EICR to get interrupt cause.
* At least, reading EICR clears lower 16bits of EIMS on 82598.
*/
eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
/* Be sure the queue bits are not cleared */
eicr &= ~IXGBE_EICR_RTX_QUEUE;
/* Clear all OTHER interrupts with write */
IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
ixgbe_intr_admin_common(sc, eicr, &eims_disable);
/* Re-enable some OTHER interrupts */
IXGBE_WRITE_REG(hw, IXGBE_EIMS, eims_orig & ~eims_disable);
/*
* An interrupt might not arrive when a module is inserted.
* When an link status change interrupt occurred and the driver
* still regard SFP as unplugged, issue the module softint
* and then issue LSC interrupt.
*/
if ((eicr & eicr_mask)
|| ((hw->phy.sfp_type == ixgbe_sfp_type_not_present)
&& (eicr & IXGBE_EICR_LSC))) {
task_requests |= IXGBE_REQUEST_TASK_MOD;
*eims_disable |= IXGBE_EIMS_LSC;
}
/*
* If each "que->txrx_use_workqueue" is changed in sysctl handler,
* it causesflip-flopping softint/workqueue mode in one deferred
* processing. Therefore, preempt_disable()/preempt_enable() are
* required in ixgbe_sched_handle_que() to avoid
* KASSERT(ixgbe_sched_handle_que()) in softint_schedule().
* I think changing "que->txrx_use_workqueue" in interrupt handler
* is lighter than doing preempt_disable()/preempt_enable() in every
* ixgbe_sched_handle_que().
*/
sc->txrx_use_workqueue = ixgbe_txrx_workqueue;
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
CTLTYPE_BOOL, "txrx_workqueue",
SYSCTL_DESCR("Use workqueue for packet processing"),
NULL, 0, &sc->txrx_use_workqueue, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
#ifdef IXGBE_DEBUG
/* testing sysctls (for all devices) */
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
CTLTYPE_INT, "power_state", SYSCTL_DESCR("PCI Power State"),
ixgbe_sysctl_power_state, 0, (void *)sc, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READONLY,
CTLTYPE_STRING, "print_rss_config",
SYSCTL_DESCR("Prints RSS Configuration"),
ixgbe_sysctl_print_rss_config, 0, (void *)sc, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
#endif
/* for X550 series devices */
if (hw->mac.type >= ixgbe_mac_X550)
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
CTLTYPE_INT, "dmac", SYSCTL_DESCR("DMA Coalesce"),
ixgbe_sysctl_dmac, 0, (void *)sc, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
/* for WoL-capable devices */
if (sc->wol_support) {
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
CTLTYPE_BOOL, "wol_enable",
SYSCTL_DESCR("Enable/Disable Wake on LAN"),
ixgbe_sysctl_wol_enable, 0, (void *)sc, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
CTLTYPE_INT, "wufc",
SYSCTL_DESCR("Enable/Disable Wake Up Filters"),
ixgbe_sysctl_wufc, 0, (void *)sc, 0, CTL_CREATE,
CTL_EOL) != 0)
aprint_error_dev(dev, "could not create sysctl\n");
}
/* for X552/X557-AT devices */
if ((hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) ||
(hw->device_id == IXGBE_DEV_ID_X550EM_A_10G_T)) {
const struct sysctlnode *phy_node;
for (i = 0; i < sc->num_queues; i++, que++, txr++) {
if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
if (txr->txr_si != NULL)
softint_disestablish(txr->txr_si);
}
if (que->que_si != NULL)
softint_disestablish(que->que_si);
}
if (sc->txr_wq != NULL)
workqueue_destroy(sc->txr_wq);
if (sc->txr_wq_enqueued != NULL)
percpu_free(sc->txr_wq_enqueued, sizeof(u_int));
if (sc->que_wq != NULL)
workqueue_destroy(sc->que_wq);
if (sc->admin_wq != NULL) {
workqueue_destroy(sc->admin_wq);
sc->admin_wq = NULL;
}
if (sc->timer_wq != NULL) {
workqueue_destroy(sc->timer_wq);
sc->timer_wq = NULL;
}
if (sc->recovery_mode_timer_wq != NULL) {
/*
* ixgbe_ifstop() doesn't call the workqueue_wait() for
* the recovery_mode_timer workqueue, so call it here.
*/
workqueue_wait(sc->recovery_mode_timer_wq,
&sc->recovery_mode_timer_wc);
atomic_store_relaxed(&sc->recovery_mode_timer_pending, 0);
workqueue_destroy(sc->recovery_mode_timer_wq);
sc->recovery_mode_timer_wq = NULL;
}
} /* ixgbe_free_deferred_handlers */
/************************************************************************
* ixgbe_detach - Device removal routine
*
* Called when the driver is being removed.
* Stops the adapter and deallocates all the resources
* that were allocated for driver operation.
*
* return 0 on success, positive on failure
************************************************************************/
static int
ixgbe_detach(device_t dev, int flags)
{
struct ixgbe_softc *sc = device_private(dev);
struct rx_ring *rxr = sc->rx_rings;
struct tx_ring *txr = sc->tx_rings;
struct ixgbe_hw *hw = &sc->hw;
struct ixgbe_hw_stats *stats = &sc->stats.pf;
u32 ctrl_ext;
int i;
INIT_DEBUGOUT("ixgbe_detach: begin");
if (sc->osdep.attached == false)
return 0;
if (ixgbe_pci_iov_detach(dev) != 0) {
device_printf(dev, "SR-IOV in use; detach first.\n");
return (EBUSY);
}
if (VLAN_ATTACHED(&sc->osdep.ec) &&
(flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == 0) {
aprint_error_dev(dev, "VLANs in use, detach first\n");
return (EBUSY);
}
ether_ifdetach(sc->ifp);
sc->osdep.detaching = true;
/*
* Stop the interface. ixgbe_setup_low_power_mode() calls
* ixgbe_ifstop(), so it's not required to call ixgbe_ifstop()
* directly.
*/
ixgbe_setup_low_power_mode(sc);
callout_halt(&sc->timer, NULL);
if (sc->feat_en & IXGBE_FEATURE_RECOVERY_MODE)
callout_halt(&sc->recovery_mode_timer, NULL);
/* let hardware know driver is unloading */
ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
if (sc->feat_en & IXGBE_FEATURE_NETMAP)
netmap_detach(sc->ifp);
ixgbe_free_pci_resources(sc);
#if 0 /* XXX the NetBSD port is probably missing something here */
bus_generic_detach(dev);
#endif
if_detach(sc->ifp);
ifmedia_fini(&sc->media);
if_percpuq_destroy(sc->ipq);
for (i = 0; i < IXGBE_TC_COUNTER_NUM; i++) {
if (i < __arraycount(stats->mpc)) {
evcnt_detach(&stats->mpc[i]);
if (hw->mac.type == ixgbe_mac_82598EB)
evcnt_detach(&stats->rnbc[i]);
}
if (i < __arraycount(stats->pxontxc)) {
evcnt_detach(&stats->pxontxc[i]);
evcnt_detach(&stats->pxonrxc[i]);
evcnt_detach(&stats->pxofftxc[i]);
evcnt_detach(&stats->pxoffrxc[i]);
if (hw->mac.type >= ixgbe_mac_82599EB)
evcnt_detach(&stats->pxon2offc[i]);
}
}
txr = sc->tx_rings;
for (i = 0; i < sc->num_queues; i++, rxr++, txr++) {
evcnt_detach(&sc->queues[i].irqs);
evcnt_detach(&sc->queues[i].handleq);
evcnt_detach(&sc->queues[i].req);
evcnt_detach(&txr->total_packets);
#ifndef IXGBE_LEGACY_TX
evcnt_detach(&txr->pcq_drops);
#endif
evcnt_detach(&txr->no_desc_avail);
evcnt_detach(&txr->tso_tx);
if (i < __arraycount(stats->qprc)) {
evcnt_detach(&stats->qprc[i]);
evcnt_detach(&stats->qptc[i]);
evcnt_detach(&stats->qbrc[i]);
evcnt_detach(&stats->qbtc[i]);
if (hw->mac.type >= ixgbe_mac_82599EB)
evcnt_detach(&stats->qprdc[i]);
}
/************************************************************************
* ixgbe_setup_low_power_mode - LPLU/WoL preparation
*
* Prepare the adapter/port for LPLU and/or WoL
************************************************************************/
static int
ixgbe_setup_low_power_mode(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
device_t dev = sc->dev;
struct ifnet *ifp = sc->ifp;
s32 error = 0;
/* Limit power management flow to X550EM baseT */
if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
hw->phy.ops.enter_lplu) {
/* X550EM baseT adapters need a special LPLU flow */
hw->phy.reset_disable = true;
ixgbe_ifstop(ifp, 1);
error = hw->phy.ops.enter_lplu(hw);
if (error)
device_printf(dev,
"Error entering LPLU: %d\n", error);
hw->phy.reset_disable = false;
} else {
/* Just stop for other adapters */
ixgbe_ifstop(ifp, 1);
}
IXGBE_CORE_LOCK(sc);
if (!hw->wol_enabled) {
ixgbe_set_phy_power(hw, FALSE);
IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
IXGBE_WRITE_REG(hw, IXGBE_WUC, 0);
} else {
/* Turn off support for APM wakeup. (Using ACPI instead) */
IXGBE_WRITE_REG(hw, IXGBE_GRC_BY_MAC(hw),
IXGBE_READ_REG(hw, IXGBE_GRC_BY_MAC(hw)) & ~(u32)2);
/*
* Clear Wake Up Status register to prevent any previous wakeup
* events from waking us up immediately after we suspend.
*/
IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
/*
* Program the Wakeup Filter Control register with user filter
* settings
*/
IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
/* Enable wakeups and power management in Wakeup Control */
IXGBE_WRITE_REG(hw, IXGBE_WUC,
IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
}
IXGBE_CORE_UNLOCK(sc);
return error;
} /* ixgbe_setup_low_power_mode */
/************************************************************************
* ixgbe_shutdown - Shutdown entry point
************************************************************************/
#if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
static int
ixgbe_shutdown(device_t dev)
{
struct ixgbe_softc *sc = device_private(dev);
int error = 0;
INIT_DEBUGOUT("ixgbe_shutdown: begin");
error = ixgbe_setup_low_power_mode(sc);
return (error);
} /* ixgbe_shutdown */
#endif
/************************************************************************
* ixgbe_suspend
*
* From D0 to D3
************************************************************************/
static bool
ixgbe_suspend(device_t dev, const pmf_qual_t *qual)
{
struct ixgbe_softc *sc = device_private(dev);
int error = 0;
/* Read & clear WUS register */
wus = IXGBE_READ_REG(hw, IXGBE_WUS);
if (wus)
device_printf(dev, "Woken up by (WUS): %#010x\n",
IXGBE_READ_REG(hw, IXGBE_WUS));
IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
/* And clear WUFC until next low-power transition */
IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
/*
* Required after D3->D0 transition;
* will re-advertise all previous advertised speeds
*/
if (ifp->if_flags & IFF_UP)
ixgbe_init_locked(sc);
IXGBE_CORE_UNLOCK(sc);
return true;
} /* ixgbe_resume */
/*
* Set the various hardware offload abilities.
*
* This takes the ifnet's if_capenable flags (e.g. set by the user using
* ifconfig) and indicates to the OS via the ifnet's if_hwassist field what
* mbuf offload flags the driver will understand.
*/
static void
ixgbe_set_if_hwassist(struct ixgbe_softc *sc)
{
/* XXX */
}
/************************************************************************
* ixgbe_init_locked - Init entry point
*
* Used in two ways: It is used by the stack as an init
* entry point in network interface structure. It is also
* used by the driver as a hw/sw initialization routine to
* get to a consistent state.
*
* return 0 on success, positive on failure
************************************************************************/
static void
ixgbe_init_locked(struct ixgbe_softc *sc)
{
struct ifnet *ifp = sc->ifp;
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
struct ix_queue *que;
struct tx_ring *txr;
struct rx_ring *rxr;
u32 txdctl, mhadd;
u32 rxdctl, rxctrl;
u32 ctrl_ext;
bool unsupported_sfp = false;
int i, j, error;
/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
hw->need_unsupported_sfp_recovery = false;
hw->adapter_stopped = FALSE;
ixgbe_stop_adapter(hw);
callout_stop(&sc->timer);
if (sc->feat_en & IXGBE_FEATURE_RECOVERY_MODE)
callout_stop(&sc->recovery_mode_timer);
for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
que->disabled_count = 0;
/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
sc->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
/* Queue indices may change with IOV mode */
ixgbe_align_all_queue_indices(sc);
/* reprogram the RAR[0] in case user changed it. */
ixgbe_set_rar(hw, 0, hw->mac.addr, sc->pool, IXGBE_RAH_AV);
/* Get the latest mac address, User can use a LAA */
memcpy(hw->mac.addr, CLLADDR(ifp->if_sadl),
IXGBE_ETH_LENGTH_OF_ADDRESS);
ixgbe_set_rar(hw, 0, hw->mac.addr, sc->pool, 1);
hw->addr_ctrl.rar_used_count = 1;
/* Set hardware offload abilities from ifnet flags */
ixgbe_set_if_hwassist(sc);
/* Prepare transmit descriptors and buffers */
if (ixgbe_setup_transmit_structures(sc)) {
device_printf(dev, "Could not setup transmit structures\n");
ixgbe_stop_locked(sc);
return;
}
/* Enable SDP & MSI-X interrupts based on adapter */
ixgbe_config_gpie(sc);
/* Set MTU size */
if (ifp->if_mtu > ETHERMTU) {
/* aka IXGBE_MAXFRS on 82599 and newer */
mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
mhadd &= ~IXGBE_MHADD_MFS_MASK;
mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
}
/* Now enable all the queues */
for (i = 0; i < sc->num_queues; i++) {
txr = &sc->tx_rings[i];
txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me));
txdctl |= IXGBE_TXDCTL_ENABLE;
/* Set WTHRESH to 8, burst writeback */
txdctl &= ~IXGBE_TXDCTL_WTHRESH_MASK;
txdctl |= IXGBE_TX_WTHRESH << IXGBE_TXDCTL_WTHRESH_SHIFT;
/*
* When the internal queue falls below PTHRESH (32),
* start prefetching as long as there are at least
* HTHRESH (1) buffers ready. The values are taken
* from the Intel linux driver 3.8.21.
* Prefetching enables tx line rate even with 1 queue.
*/
txdctl |= (32 << 0) | (1 << 8);
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl);
}
/*
* In netmap mode, we must preserve the buffers made
* available to userspace before the if_init()
* (this is true by default on the TX side, because
* init makes all buffers available to userspace).
*
* netmap_reset() and the device specific routines
* (e.g. ixgbe_setup_receive_rings()) map these
* buffers at the end of the NIC ring, so here we
* must set the RDT (tail) register to make sure
* they are not overwritten.
*
* In this driver the NIC ring starts at RDH = 0,
* RDT points to the last slot available for reception (?),
* so RDT = num_rx_desc - 1 means the whole ring is available.
*/
#ifdef DEV_NETMAP
if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
(ifp->if_capenable & IFCAP_NETMAP)) {
struct netmap_adapter *na = NA(sc->ifp);
struct netmap_kring *kring = na->rx_rings[i];
int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
/* Set up MSI/MSI-X routing */
if (sc->feat_en & IXGBE_FEATURE_MSIX) {
ixgbe_configure_ivars(sc);
/* Set up auto-mask */
if (hw->mac.type == ixgbe_mac_82598EB)
IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
else {
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
}
} else { /* Simple settings for Legacy/MSI */
ixgbe_set_ivar(sc, 0, 0, 0);
ixgbe_set_ivar(sc, 0, 0, 1);
IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
}
ixgbe_init_fdir(sc);
/*
* Check on any SFP devices that
* need to be kick-started
*/
if (hw->phy.type == ixgbe_phy_none) {
error = hw->phy.ops.identify(hw);
if (error == IXGBE_ERR_SFP_NOT_SUPPORTED)
unsupported_sfp = true;
} else if (hw->phy.type == ixgbe_phy_sfp_unsupported)
unsupported_sfp = true;
if (unsupported_sfp)
device_printf(dev,
"Unsupported SFP+ module type was detected.\n");
/* Set moderation on the Link interrupt */
ixgbe_eitr_write(sc, sc->vector, IXGBE_LINK_ITR);
/* Enable EEE power saving */
if (sc->feat_cap & IXGBE_FEATURE_EEE)
hw->mac.ops.setup_eee(hw,
sc->feat_en & IXGBE_FEATURE_EEE);
/* Enable power to the phy. */
if (!unsupported_sfp) {
ixgbe_set_phy_power(hw, TRUE);
/* Config/Enable Link */
ixgbe_config_link(sc);
}
/* Hardware Packet Buffer & Flow Control setup */
ixgbe_config_delay_values(sc);
/* Initialize the FC settings */
ixgbe_start_hw(hw);
/* Set up VLAN support and filter */
ixgbe_setup_vlan_hw_support(sc);
/* Setup DMA Coalescing */
ixgbe_config_dmac(sc);
/* OK to schedule workqueues. */
sc->schedule_wqs_ok = true;
/* Enable the use of the MBX by the VF's */
if (sc->feat_en & IXGBE_FEATURE_SRIOV) {
ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
}
/************************************************************************
* ixgbe_set_ivar
*
* Setup the correct IVAR register for a particular MSI-X interrupt
* (yes this is all very magic and confusing :)
* - entry is the register array entry
* - vector is the MSI-X vector for this queue
* - type is RX/TX/MISC
************************************************************************/
static void
ixgbe_set_ivar(struct ixgbe_softc *sc, u8 entry, u8 vector, s8 type)
{
struct ixgbe_hw *hw = &sc->hw;
u32 ivar, index;
vector |= IXGBE_IVAR_ALLOC_VAL;
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
if (type == -1)
entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
else
entry += (type * 64);
index = (entry >> 2) & 0x1F;
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
ivar &= ~(0xffUL << (8 * (entry & 0x3)));
ivar |= ((u32)vector << (8 * (entry & 0x3)));
IXGBE_WRITE_REG(&sc->hw, IXGBE_IVAR(index), ivar);
break;
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
if (type == -1) { /* MISC IVAR */
index = (entry & 1) * 8;
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
ivar &= ~(0xffUL << index);
ivar |= ((u32)vector << index);
IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
} else { /* RX/TX IVARS */
index = (16 * (entry & 1)) + (8 * type);
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
ivar &= ~(0xffUL << index);
ivar |= ((u32)vector << index);
IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
}
break;
default:
break;
}
} /* ixgbe_set_ivar */
for (int i = 0; i < sc->num_queues; i++, que++) {
struct rx_ring *rxr = &sc->rx_rings[i];
struct tx_ring *txr = &sc->tx_rings[i];
/* First the RX queue entry */
ixgbe_set_ivar(sc, rxr->me, que->msix, 0);
/* ... and the TX */
ixgbe_set_ivar(sc, txr->me, que->msix, 1);
/* Set an Initial EITR value */
ixgbe_eitr_write(sc, que->msix, newitr);
/*
* To eliminate influence of the previous state.
* At this point, Tx/Rx interrupt handler
* (ixgbe_msix_que()) cannot be called, so both
* IXGBE_TX_LOCK and IXGBE_RX_LOCK are not required.
*/
que->eitr_setting = 0;
}
/* For the Link interrupt */
ixgbe_set_ivar(sc, 1, sc->vector, -1);
} /* ixgbe_configure_ivars */
/* Calculate High Water */
switch (hw->mac.type) {
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
tmp = IXGBE_DV_X540(frame, frame);
break;
default:
tmp = IXGBE_DV(frame, frame);
break;
}
size = IXGBE_BT2KB(tmp);
rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
hw->fc.high_water[0] = rxpb - size;
/* Now calculate Low Water */
switch (hw->mac.type) {
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
tmp = IXGBE_LOW_DV_X540(frame);
break;
default:
tmp = IXGBE_LOW_DV(frame);
break;
}
hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
/************************************************************************
* ixgbe_mc_array_itr
*
* An iterator function needed by the multicast shared code.
* It feeds the shared code routine the addresses in the
* array of ixgbe_set_rxfilter() one by one.
************************************************************************/
static u8 *
ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
{
struct ixgbe_mc_addr *mta;
mta = (struct ixgbe_mc_addr *)*update_ptr;
*vmdq = mta->vmdq;
*update_ptr = (u8*)(mta + 1);
return (mta->addr);
} /* ixgbe_mc_array_itr */
/************************************************************************
* ixgbe_local_timer - Timer routine
*
* Checks for link status, updates statistics,
* and runs the watchdog check.
************************************************************************/
static void
ixgbe_local_timer(void *arg)
{
struct ixgbe_softc *sc = arg;
if (sc->schedule_wqs_ok) {
if (atomic_cas_uint(&sc->timer_pending, 0, 1) == 0)
workqueue_enqueue(sc->timer_wq,
&sc->timer_wc, NULL);
}
}
/* Check for pluggable optics */
if (ixgbe_is_sfp(hw)) {
bool sched_mod_task = false;
if (hw->mac.type == ixgbe_mac_82598EB) {
/*
* On 82598EB, SFP+'s MOD_ABS pin is not connected to
* any GPIO(SDP). So just schedule TASK_MOD.
*/
sched_mod_task = true;
} else {
bool was_full, is_full;
/*
* Check the TX queues status
* - mark hung queues so we don't schedule on them
* - watchdog only if all queues show hung
*/
que = sc->queues;
for (i = 0; i < sc->num_queues; i++, que++) {
/* Keep track of queues with work for soft irq */
if (que->txr->busy)
queues |= 1ULL << que->me;
/*
* Each time txeof runs without cleaning, but there
* are uncleaned descriptors it increments busy. If
* we get to the MAX we declare it hung.
*/
if (que->busy == IXGBE_QUEUE_HUNG) {
++hung;
/* Mark the queue as inactive */
sc->active_queues &= ~(1ULL << que->me);
continue;
} else {
/* Check if we've come back from hung */
if ((sc->active_queues & (1ULL << que->me)) == 0)
sc->active_queues |= 1ULL << que->me;
}
if (que->busy >= IXGBE_MAX_TX_BUSY) {
device_printf(dev,
"Warning queue %d appears to be hung!\n", i);
que->txr->busy = IXGBE_QUEUE_HUNG;
++hung;
}
}
/* Only truly watchdog if all queues show hung */
if (hung == sc->num_queues)
goto watchdog;
#if 0 /* XXX Avoid unexpectedly disabling interrupt forever (PR#53294) */
else if (queues != 0) { /* Force an IRQ on queues with work */
que = sc->queues;
for (i = 0; i < sc->num_queues; i++, que++) {
mutex_enter(&que->dc_mtx);
if (que->disabled_count == 0)
ixgbe_rearm_queues(sc,
queues & ((u64)1 << i));
mutex_exit(&que->dc_mtx);
}
}
#endif
/************************************************************************
* ixgbe_handle_mod - Tasklet for SFP module interrupts
* bool int_en: true if it's called when the interrupt is enabled.
************************************************************************/
static void
ixgbe_handle_mod(void *context, bool int_en)
{
struct ixgbe_softc *sc = context;
struct ixgbe_hw *hw = &sc->hw;
device_t dev = sc->dev;
enum ixgbe_sfp_type last_sfp_type;
u32 err;
bool last_unsupported_sfp_recovery;
KASSERT(mutex_owned(&sc->core_mtx));
last_sfp_type = hw->phy.sfp_type;
last_unsupported_sfp_recovery = hw->need_unsupported_sfp_recovery;
IXGBE_EVC_ADD(&sc->mod_workev, 1);
if (sc->hw.need_crosstalk_fix) {
if ((hw->mac.type != ixgbe_mac_82598EB) &&
!ixgbe_sfp_cage_full(hw))
goto out;
}
err = hw->phy.ops.identify_sfp(hw);
if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
if (last_unsupported_sfp_recovery == false)
device_printf(dev,
"Unsupported SFP+ module type was detected.\n");
goto out;
}
if (hw->need_unsupported_sfp_recovery) {
device_printf(dev, "Recovering from unsupported SFP\n");
/*
* We could recover the status by calling setup_sfp(),
* setup_link() and some others. It's complex and might not
* work correctly on some unknown cases. To avoid such type of
* problem, call ixgbe_init_locked(). It's simple and safe
* approach.
*/
ixgbe_init_locked(sc);
} else if ((hw->phy.sfp_type != ixgbe_sfp_type_not_present) &&
(hw->phy.sfp_type != last_sfp_type)) {
/* A module is inserted and changed. */
out:
/* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */
sc->phy_layer = ixgbe_get_supported_physical_layer(hw);
/* Adjust media types shown in ifconfig */
IXGBE_CORE_UNLOCK(sc);
ifmedia_removeall(&sc->media);
ixgbe_add_media_types(sc);
ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
IXGBE_CORE_LOCK(sc);
/*
* Don't schedule MSF event if the chip is 82598. 82598 doesn't support
* MSF. At least, calling ixgbe_handle_msf on 82598 DA makes the link
* flap because the function calls setup_link().
*/
if (hw->mac.type != ixgbe_mac_82598EB) {
mutex_enter(&sc->admin_mtx);
if (int_en)
sc->task_requests |= IXGBE_REQUEST_TASK_MSF;
else
sc->task_requests |= IXGBE_REQUEST_TASK_MSF_WOI;
mutex_exit(&sc->admin_mtx);
}
/*
* Don't call ixgbe_schedule_admin_tasklet() because we are on
* the workqueue now.
*/
} /* ixgbe_handle_mod */
/************************************************************************
* ixgbe_stop_locked - Stop the hardware
*
* Disables all traffic on the adapter by issuing a
* global reset on the MAC and deallocates TX/RX buffers.
************************************************************************/
static void
ixgbe_stop_locked(void *arg)
{
struct ifnet *ifp;
struct ixgbe_softc *sc = arg;
struct ixgbe_hw *hw = &sc->hw;
/* Let the stack know...*/
ifp->if_flags &= ~IFF_RUNNING;
ixgbe_reset_hw(hw);
hw->adapter_stopped = FALSE;
ixgbe_stop_adapter(hw);
if (hw->mac.type == ixgbe_mac_82599EB)
ixgbe_stop_mac_link_on_d3_82599(hw);
/* Turn off the laser - noop with no optics */
ixgbe_disable_tx_laser(hw);
/* Update the stack */
sc->link_up = FALSE;
ixgbe_update_link_status(sc);
/* reprogram the RAR[0] in case user changed it. */
ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
return;
} /* ixgbe_stop_locked */
/************************************************************************
* ixgbe_update_link_status - Update OS on link state
*
* Note: Only updates the OS on the cached link state.
* The real check of the hardware only happens with
* a link interrupt.
************************************************************************/
static void
ixgbe_update_link_status(struct ixgbe_softc *sc)
{
struct ifnet *ifp = sc->ifp;
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
KASSERT(mutex_owned(&sc->core_mtx));
if (sc->link_up) {
if (sc->link_active != LINK_STATE_UP) {
/*
* To eliminate influence of the previous state
* in the same way as ixgbe_init_locked().
*/
struct ix_queue *que = sc->queues;
for (int i = 0; i < sc->num_queues; i++, que++)
que->eitr_setting = 0;
if (sc->link_speed == IXGBE_LINK_SPEED_10GB_FULL) {
/*
* Discard count for both MAC Local Fault and
* Remote Fault because those registers are
* valid only when the link speed is up and
* 10Gbps.
*/
IXGBE_READ_REG(hw, IXGBE_MLFC);
IXGBE_READ_REG(hw, IXGBE_MRFC);
}
if (bootverbose) {
const char *bpsmsg;
switch (sc->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
bpsmsg = "10 Gbps";
break;
case IXGBE_LINK_SPEED_5GB_FULL:
bpsmsg = "5 Gbps";
break;
case IXGBE_LINK_SPEED_2_5GB_FULL:
bpsmsg = "2.5 Gbps";
break;
case IXGBE_LINK_SPEED_1GB_FULL:
bpsmsg = "1 Gbps";
break;
case IXGBE_LINK_SPEED_100_FULL:
bpsmsg = "100 Mbps";
break;
case IXGBE_LINK_SPEED_10_FULL:
bpsmsg = "10 Mbps";
break;
default:
bpsmsg = "unknown speed";
break;
}
device_printf(dev, "Link is up %s %s \n",
bpsmsg, "Full Duplex");
}
sc->link_active = LINK_STATE_UP;
/* Update any Flow Control changes */
ixgbe_fc_enable(&sc->hw);
/* Update DMA coalescing config */
ixgbe_config_dmac(sc);
if_link_state_change(ifp, LINK_STATE_UP);
if (sc->feat_en & IXGBE_FEATURE_SRIOV)
ixgbe_ping_all_vfs(sc);
}
} else {
/*
* Do it when link active changes to DOWN. i.e.
* a) LINK_STATE_UNKNOWN -> LINK_STATE_DOWN
* b) LINK_STATE_UP -> LINK_STATE_DOWN
*/
if (sc->link_active != LINK_STATE_DOWN) {
if (bootverbose)
device_printf(dev, "Link is Down\n");
if_link_state_change(ifp, LINK_STATE_DOWN);
sc->link_active = LINK_STATE_DOWN;
if (sc->feat_en & IXGBE_FEATURE_SRIOV)
ixgbe_ping_all_vfs(sc);
ixgbe_drain_all(sc);
}
}
} /* ixgbe_update_link_status */
switch (sc->hw.mac.type) {
case ixgbe_mac_82599EB:
mask |= IXGBE_EIMS_ECC;
/* Temperature sensor on some adapters */
mask |= IXGBE_EIMS_GPI_SDP0;
/* SFP+ (RX_LOS_N & MOD_ABS_N) */
mask |= IXGBE_EIMS_GPI_SDP1;
mask |= IXGBE_EIMS_GPI_SDP2;
break;
case ixgbe_mac_X540:
/* Detect if Thermal Sensor is enabled */
fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
if (fwsm & IXGBE_FWSM_TS_ENABLED)
mask |= IXGBE_EIMS_TS;
mask |= IXGBE_EIMS_ECC;
break;
case ixgbe_mac_X550:
/* MAC thermal sensor is automatically enabled */
mask |= IXGBE_EIMS_TS;
mask |= IXGBE_EIMS_ECC;
break;
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
/* Some devices use SDP0 for important information */
if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N ||
hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
if (hw->phy.type == ixgbe_phy_x550em_ext_t)
mask |= IXGBE_EICR_GPI_SDP0_X540;
mask |= IXGBE_EIMS_ECC;
break;
default:
break;
}
/* Enable Fan Failure detection */
if (sc->feat_en & IXGBE_FEATURE_FAN_FAIL)
mask |= IXGBE_EIMS_GPI_SDP1;
/* Enable SR-IOV */
if (sc->feat_en & IXGBE_FEATURE_SRIOV)
mask |= IXGBE_EIMS_MAILBOX;
/* Enable Flow Director */
if (sc->feat_en & IXGBE_FEATURE_FDIR)
mask |= IXGBE_EIMS_FLOW_DIR;
IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
/* With MSI-X we use auto clear */
if ((sc->feat_en & IXGBE_FEATURE_MSIX) != 0) {
/*
* We use auto clear for RTX_QUEUE only. Don't use other
* interrupts (e.g. link interrupt). BTW, we don't use
* TCP_TIMER interrupt itself.
*/
IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
}
/*
* Now enable all queues, this is done separately to
* allow for handling the extended (beyond 32) MSI-X
* vectors that can be used by 82599
*/
for (int i = 0; i < sc->num_queues; i++, que++)
ixgbe_enable_queue(sc, que->msix);
/************************************************************************
* ixgbe_set_flowcntl - Set flow control
*
* Flow control values:
* 0 - off
* 1 - rx pause
* 2 - tx pause
* 3 - full
************************************************************************/
static int
ixgbe_set_flowcntl(struct ixgbe_softc *sc, int fc)
{
switch (fc) {
case ixgbe_fc_rx_pause:
case ixgbe_fc_tx_pause:
case ixgbe_fc_full:
sc->hw.fc.requested_mode = fc;
if (sc->num_queues > 1)
ixgbe_disable_rx_drop(sc);
break;
case ixgbe_fc_none:
sc->hw.fc.requested_mode = ixgbe_fc_none;
if (sc->num_queues > 1)
ixgbe_enable_rx_drop(sc);
break;
default:
return (EINVAL);
}
#if 0 /* XXX NetBSD */
/* Don't autoneg if forcing a value */
sc->hw.fc.disable_fc_autoneg = TRUE;
#endif
ixgbe_fc_enable(&sc->hw);
return (0);
} /* ixgbe_set_flowcntl */
/************************************************************************
* ixgbe_enable_rx_drop
*
* Enable the hardware to drop packets when the buffer is
* full. This is useful with multiqueue, so that no single
* queue being full stalls the entire RX engine. We only
* enable this when Multiqueue is enabled AND Flow Control
* is disabled.
************************************************************************/
static void
ixgbe_enable_rx_drop(struct ixgbe_softc *sc)
{
struct ixgbe_hw *hw = &sc->hw;
struct rx_ring *rxr;
u32 srrctl;
for (int i = 0; i < sc->num_queues; i++) {
rxr = &sc->rx_rings[i];
srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
srrctl |= IXGBE_SRRCTL_DROP_EN;
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
}
/* enable drop for each vf */
for (int i = 0; i < sc->num_vfs; i++) {
IXGBE_WRITE_REG(hw, IXGBE_QDE,
(IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) |
IXGBE_QDE_ENABLE));
}
} /* ixgbe_enable_rx_drop */
for (int i = 0; i < sc->num_queues; i++) {
rxr = &sc->rx_rings[i];
srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
srrctl &= ~IXGBE_SRRCTL_DROP_EN;
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
}
/* disable drop for each vf */
for (int i = 0; i < sc->num_vfs; i++) {
IXGBE_WRITE_REG(hw, IXGBE_QDE,
(IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT)));
}
} /* ixgbe_disable_rx_drop */
/* Checks to validate new value */
if (sc->advertise == advertise) /* no change */
return (0);
dev = sc->dev;
hw = &sc->hw;
/* No speed changes for backplane media */
if (hw->phy.media_type == ixgbe_media_type_backplane)
return (ENODEV);
if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
(hw->phy.multispeed_fiber))) {
device_printf(dev,
"Advertised speed can only be set on copper or "
"multispeed fiber media types.\n");
return (EINVAL);
}
if (advertise < 0x0 || advertise > 0x3f) {
device_printf(dev, "Invalid advertised speed; "
"valid modes are 0x0 through 0x3f\n");
return (EINVAL);
}
if (hw->mac.ops.get_link_capabilities) {
err = hw->mac.ops.get_link_capabilities(hw, &link_caps,
&negotiate);
if (err != IXGBE_SUCCESS) {
device_printf(dev, "Unable to determine supported "
"advertise speeds\n");
return (ENODEV);
}
}
/* Set new value and report new advertised mode */
if (advertise & 0x1) {
if (!(link_caps & IXGBE_LINK_SPEED_100_FULL)) {
device_printf(dev, "Interface does not support 100Mb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_100_FULL;
}
if (advertise & 0x2) {
if (!(link_caps & IXGBE_LINK_SPEED_1GB_FULL)) {
device_printf(dev, "Interface does not support 1Gb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_1GB_FULL;
}
if (advertise & 0x4) {
if (!(link_caps & IXGBE_LINK_SPEED_10GB_FULL)) {
device_printf(dev, "Interface does not support 10Gb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_10GB_FULL;
}
if (advertise & 0x8) {
if (!(link_caps & IXGBE_LINK_SPEED_10_FULL)) {
device_printf(dev, "Interface does not support 10Mb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_10_FULL;
}
if (advertise & 0x10) {
if (!(link_caps & IXGBE_LINK_SPEED_2_5GB_FULL)) {
device_printf(dev, "Interface does not support 2.5Gb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_2_5GB_FULL;
}
if (advertise & 0x20) {
if (!(link_caps & IXGBE_LINK_SPEED_5GB_FULL)) {
device_printf(dev, "Interface does not support 5Gb "
"advertised speed\n");
return (EINVAL);
}
speed |= IXGBE_LINK_SPEED_5GB_FULL;
}
if (advertise == 0)
speed = link_caps; /* All capable link speed */
switch (newval) {
case 0:
/* Disabled */
sc->dmac = 0;
break;
case 1:
/* Enable and use default */
sc->dmac = 1000;
break;
case 50:
case 100:
case 250:
case 500:
case 1000:
case 2000:
case 5000:
case 10000:
/* Legal values - allow */
sc->dmac = newval;
break;
default:
/* Do nothing, illegal value */
return (EINVAL);
}
/* Re-initialize hardware if it's already running */
if (ifp->if_flags & IFF_RUNNING)
if_init(ifp);
return (0);
}
#ifdef IXGBE_DEBUG
/************************************************************************
* ixgbe_sysctl_power_state
*
* Sysctl to test power states
* Values:
* 0 - set device to D0
* 3 - set device to D3
* (none) - get current device power state
************************************************************************/
static int
ixgbe_sysctl_power_state(SYSCTLFN_ARGS)
{
#ifdef notyet
struct sysctlnode node = *rnode;
struct ixgbe_softc *sc = (struct ixgbe_softc *)node.sysctl_data;
device_t dev = sc->dev;
int curr_ps, new_ps, error = 0;
if (ixgbe_fw_recovery_mode_swflag(sc))
return (EPERM);
/************************************************************************
* ixgbe_sysctl_wol_enable
*
* Sysctl to enable/disable the WoL capability,
* if supported by the adapter.
*
* Values:
* 0 - disabled
* 1 - enabled
************************************************************************/
static int
ixgbe_sysctl_wol_enable(SYSCTLFN_ARGS)
{
struct sysctlnode node = *rnode;
struct ixgbe_softc *sc = (struct ixgbe_softc *)node.sysctl_data;
struct ixgbe_hw *hw = &sc->hw;
bool new_wol_enabled;
int error = 0;
/*
* It's not required to check recovery mode because this function never
* touches hardware.
*/
new_wol_enabled = hw->wol_enabled;
node.sysctl_data = &new_wol_enabled;
error = sysctl_lookup(SYSCTLFN_CALL(&node));
if ((error) || (newp == NULL))
return (error);
if (new_wol_enabled == hw->wol_enabled)
return (0);
if (new_wol_enabled && !sc->wol_support)
return (ENODEV);
else
hw->wol_enabled = new_wol_enabled;
return (0);
} /* ixgbe_sysctl_wol_enable */
/************************************************************************
* ixgbe_sysctl_wufc - Wake Up Filter Control
*
* Sysctl to enable/disable the types of packets that the
* adapter will wake up on upon receipt.
* Flags:
* 0x1 - Link Status Change
* 0x2 - Magic Packet
* 0x4 - Direct Exact
* 0x8 - Directed Multicast
* 0x10 - Broadcast
* 0x20 - ARP/IPv4 Request Packet
* 0x40 - Direct IPv4 Packet
* 0x80 - Direct IPv6 Packet
*
* Settings not listed above will cause the sysctl to return an error.
************************************************************************/
static int
ixgbe_sysctl_wufc(SYSCTLFN_ARGS)
{
struct sysctlnode node = *rnode;
struct ixgbe_softc *sc = (struct ixgbe_softc *)node.sysctl_data;
int error = 0;
u32 new_wufc;
/*
* It's not required to check recovery mode because this function never
* touches hardware.
*/
new_wufc = sc->wufc;
node.sysctl_data = &new_wufc;
error = sysctl_lookup(SYSCTLFN_CALL(&node));
if ((error) || (newp == NULL))
return (error);
if (new_wufc == sc->wufc)
return (0);
if (ixgbe_fw_recovery_mode_swflag(sc))
return (EPERM);
buf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
if (!buf) {
device_printf(dev, "Could not allocate sbuf for output.\n");
return (ENOMEM);
}
// TODO: use sbufs to make a string to print out
/* Set multiplier for RETA setup and table size based on MAC */
switch (sc->hw.mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
reta_size = 128;
break;
default:
reta_size = 32;
break;
}
/* Print out the redirection table */
sbuf_cat(buf, "\n");
for (int i = 0; i < reta_size; i++) {
if (i < 32) {
reg = IXGBE_READ_REG(hw, IXGBE_RETA(i));
sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg);
} else {
reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32));
sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg);
}
}
/************************************************************************
* ixgbe_sysctl_phy_temp - Retrieve temperature of PHY
*
* For X552/X557-AT devices using an external PHY
************************************************************************/
static int
ixgbe_sysctl_phy_temp(SYSCTLFN_ARGS)
{
struct sysctlnode node = *rnode;
struct ixgbe_softc *sc = (struct ixgbe_softc *)node.sysctl_data;
struct ixgbe_hw *hw = &sc->hw;
int val;
u16 reg;
int error;
if (ixgbe_fw_recovery_mode_swflag(sc))
return (EPERM);
if ((hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) &&
(hw->device_id != IXGBE_DEV_ID_X550EM_A_10G_T)) {
device_printf(sc->dev,
"Device has no supported external thermal sensor.\n");
return (ENODEV);
}
if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) {
device_printf(sc->dev,
"Error reading from PHY's current temperature register\n");
return (EAGAIN);
}
/************************************************************************
* ixgbe_sysctl_phy_overtemp_occurred
*
* Reports (directly from the PHY) whether the current PHY
* temperature is over the overtemp threshold.
************************************************************************/
static int
ixgbe_sysctl_phy_overtemp_occurred(SYSCTLFN_ARGS)
{
struct sysctlnode node = *rnode;
struct ixgbe_softc *sc = (struct ixgbe_softc *)node.sysctl_data;
struct ixgbe_hw *hw = &sc->hw;
int val, error;
u16 reg;
if (ixgbe_fw_recovery_mode_swflag(sc))
return (EPERM);
if ((hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) &&
(hw->device_id != IXGBE_DEV_ID_X550EM_A_10G_T)) {
device_printf(sc->dev,
"Device has no supported external thermal sensor.\n");
return (ENODEV);
}
if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS,
IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) {
device_printf(sc->dev,
"Error reading from PHY's temperature status register\n");
return (EAGAIN);
}
/************************************************************************
* ixgbe_print_debug_info
*
* Called only when em_display_debug_stats is enabled.
* Provides a way to take a look at important statistics
* maintained by the driver and hardware.
************************************************************************/
static void
ixgbe_print_debug_info(struct ixgbe_softc *sc)
{
device_t dev = sc->dev;
struct ixgbe_hw *hw = &sc->hw;
int table_size;
int i;
switch (sc->hw.mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
case ixgbe_mac_X550EM_a:
table_size = 128;
break;
default:
table_size = 32;
break;
}
device_printf(dev, "[E]RETA:\n");
for (i = 0; i < table_size; i++) {
if (i < 32)
printf("%02x: %08x\n", i, IXGBE_READ_REG(hw,
IXGBE_RETA(i)));
else
printf("%02x: %08x\n", i, IXGBE_READ_REG(hw,
IXGBE_ERETA(i - 32)));
}
/* Set capabilities first... */
switch (sc->hw.mac.type) {
case ixgbe_mac_82598EB:
if (sc->hw.device_id == IXGBE_DEV_ID_82598AT)
sc->feat_cap |= IXGBE_FEATURE_FAN_FAIL;
break;
case ixgbe_mac_X540:
sc->feat_cap |= IXGBE_FEATURE_SRIOV;
sc->feat_cap |= IXGBE_FEATURE_FDIR;
if ((sc->hw.device_id == IXGBE_DEV_ID_X540_BYPASS) &&
(sc->hw.bus.func == 0))
sc->feat_cap |= IXGBE_FEATURE_BYPASS;
break;
case ixgbe_mac_X550:
/*
* IXGBE_FEATURE_RECOVERY_MODE will be set after reading
* NVM Image version.
*/
sc->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR;
sc->feat_cap |= IXGBE_FEATURE_SRIOV;
sc->feat_cap |= IXGBE_FEATURE_FDIR;
break;
case ixgbe_mac_X550EM_x:
/*
* IXGBE_FEATURE_RECOVERY_MODE will be set after reading
* NVM Image version.
*/
sc->feat_cap |= IXGBE_FEATURE_SRIOV;
sc->feat_cap |= IXGBE_FEATURE_FDIR;
break;
case ixgbe_mac_X550EM_a:
/*
* IXGBE_FEATURE_RECOVERY_MODE will be set after reading
* NVM Image version.
*/
sc->feat_cap |= IXGBE_FEATURE_SRIOV;
sc->feat_cap |= IXGBE_FEATURE_FDIR;
sc->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ;
if ((sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T) ||
(sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)) {
sc->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR;
sc->feat_cap |= IXGBE_FEATURE_EEE;
}
break;
case ixgbe_mac_82599EB:
sc->feat_cap |= IXGBE_FEATURE_SRIOV;
sc->feat_cap |= IXGBE_FEATURE_FDIR;
if ((sc->hw.device_id == IXGBE_DEV_ID_82599_BYPASS) &&
(sc->hw.bus.func == 0))
sc->feat_cap |= IXGBE_FEATURE_BYPASS;
if (sc->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP)
sc->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ;
break;
default:
break;
}
/* Enabled by default... */
/* Fan failure detection */
if (sc->feat_cap & IXGBE_FEATURE_FAN_FAIL)
sc->feat_en |= IXGBE_FEATURE_FAN_FAIL;
/* Netmap */
if (sc->feat_cap & IXGBE_FEATURE_NETMAP)
sc->feat_en |= IXGBE_FEATURE_NETMAP;
/* EEE */
if (sc->feat_cap & IXGBE_FEATURE_EEE)
sc->feat_en |= IXGBE_FEATURE_EEE;
/* Thermal Sensor */
if (sc->feat_cap & IXGBE_FEATURE_TEMP_SENSOR)
sc->feat_en |= IXGBE_FEATURE_TEMP_SENSOR;
/*
* Recovery mode:
* NetBSD: IXGBE_FEATURE_RECOVERY_MODE will be controlled after reading
* NVM Image version.
*/
/* Enabled via global sysctl... */
/* Flow Director */
if (ixgbe_enable_fdir) {
if (sc->feat_cap & IXGBE_FEATURE_FDIR)
sc->feat_en |= IXGBE_FEATURE_FDIR;
else
device_printf(sc->dev, "Device does not support "
"Flow Director. Leaving disabled.");
}
/* Legacy (single queue) transmit */
if ((sc->feat_cap & IXGBE_FEATURE_LEGACY_TX) &&
ixgbe_enable_legacy_tx)
sc->feat_en |= IXGBE_FEATURE_LEGACY_TX;
/*
* Message Signal Interrupts - Extended (MSI-X)
* Normal MSI is only enabled if MSI-X calls fail.
*/
if (!ixgbe_enable_msix)
sc->feat_cap &= ~IXGBE_FEATURE_MSIX;
/* Receive-Side Scaling (RSS) */
if ((sc->feat_cap & IXGBE_FEATURE_RSS) && ixgbe_enable_rss)
sc->feat_en |= IXGBE_FEATURE_RSS;
/* Disable features with unmet dependencies... */
/* No MSI-X */
if (!(sc->feat_cap & IXGBE_FEATURE_MSIX)) {
sc->feat_cap &= ~IXGBE_FEATURE_RSS;
sc->feat_cap &= ~IXGBE_FEATURE_SRIOV;
sc->feat_en &= ~IXGBE_FEATURE_RSS;
sc->feat_en &= ~IXGBE_FEATURE_SRIOV;
}
} /* ixgbe_init_device_features */
/************************************************************************
* ixgbe_probe - Device identification routine
*
* Determines if the driver should be loaded on
* adapter based on its PCI vendor/device ID.
*
* return BUS_PROBE_DEFAULT on success, positive on failure
************************************************************************/
static int
ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
{
const struct pci_attach_args *pa = aux;
/*
* Use ratecheck() just in case interrupt occur frequently.
* When EXPX9501AT's fan stopped, interrupt occurred only once,
* an red LED on the board turned on and link never up until
* power off.
*/
if (ratecheck(&sc->lasterr_time, &ixgbe_errlog_intrvl))
device_printf(sc->dev,
"\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n");
if (ifp->if_flags & IFF_RUNNING) {
IXGBE_TX_LOCK(txr);
more = ixgbe_txeof(txr);
if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX))
if (!ixgbe_mq_ring_empty(ifp, txr->txr_interq))
ixgbe_mq_start_locked(ifp, txr);
/* Only for queue 0 */
/* NetBSD still needs this for CBQ */
if ((&sc->queues[0] == que)
&& (!ixgbe_legacy_ring_empty(ifp, NULL)))
ixgbe_legacy_start_locked(ifp, txr);
IXGBE_TX_UNLOCK(txr);
more |= ixgbe_rxeof(que);
}
if (more) {
IXGBE_EVC_ADD(&que->req, 1);
ixgbe_sched_handle_que(sc, que);
} else if (que->res != NULL) {
/* MSIX: Re-enable this interrupt */
ixgbe_enable_queue(sc, que->msix);
} else {
/* INTx or MSI */
ixgbe_enable_queue(sc, 0);
}
pc = sc->osdep.pc;
#ifdef RSS
/*
* If we're doing RSS, the number of queues needs to
* match the number of RSS buckets that are configured.
*
* + If there's more queues than RSS buckets, we'll end
* up with queues that get no traffic.
*
* + If there's more RSS buckets than queues, we'll end
* up having multiple RSS buckets map to the same queue,
* so there'll be some contention.
*/
rss_buckets = rss_getnumbuckets();
if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
(sc->num_queues != rss_buckets)) {
device_printf(dev,
"%s: number of queues (%d) != number of RSS buckets (%d)"
"; performance will be impacted.\n",
__func__, sc->num_queues, rss_buckets);
}
#endif
/* Set the handler function */
que->res = sc->osdep.ihs[i] = pci_intr_establish_xname(pc,
sc->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que,
intr_xname);
if (que->res == NULL) {
aprint_error_dev(dev,
"Failed to register QUE handler\n");
error = ENXIO;
goto err_out;
}
que->msix = vector;
sc->active_queues |= 1ULL << que->msix;
if (sc->feat_en & IXGBE_FEATURE_RSS) {
#ifdef RSS
/*
* The queue ID is used as the RSS layer bucket ID.
* We look up the queue ID -> RSS CPU ID and select
* that.
*/
cpu_id = rss_getcpu(i % rss_getnumbuckets());
CPU_SETOF(cpu_id, &cpu_mask);
#endif
} else {
/*
* Bind the MSI-X vector, and thus the
* rings to the corresponding CPU.
*
* This just happens to match the default RSS
* round-robin bucket -> queue -> CPU allocation.
*/
if (sc->num_queues > 1)
cpu_id = i;
}
/* Round-robin affinity */
kcpuset_zero(affinity);
kcpuset_set(affinity, cpu_id % ncpu);
error = interrupt_distribute(sc->osdep.ihs[i], affinity,
NULL);
aprint_normal_dev(dev, "for TX/RX, interrupting at %s",
intrstr);
if (error == 0) {
#if 1 /* def IXGBE_DEBUG */
#ifdef RSS
aprint_normal(", bound RSS bucket %d to CPU %d", i,
cpu_id % ncpu);
#else
aprint_normal(", bound queue %d to cpu %d", i,
cpu_id % ncpu);
#endif
#endif /* IXGBE_DEBUG */
}
aprint_normal("\n");
/************************************************************************
* ixgbe_configure_interrupts
*
* Setup MSI-X, MSI, or legacy interrupts (in that order).
* This will also depend on user settings.
************************************************************************/
static int
ixgbe_configure_interrupts(struct ixgbe_softc *sc)
{
device_t dev = sc->dev;
struct ixgbe_mac_info *mac = &sc->hw.mac;
int want, queues, msgs;
/* Default to 1 queue if MSI-X setup fails */
sc->num_queues = 1;
/* Override by tuneable */
if (!(sc->feat_cap & IXGBE_FEATURE_MSIX))
goto msi;
/*
* NetBSD only: Use single vector MSI when number of CPU is 1 to save
* interrupt slot.
*/
if (ncpu == 1)
goto msi;
/* First try MSI-X */
msgs = pci_msix_count(sc->osdep.pc, sc->osdep.tag);
msgs = MIN(msgs, IXG_MAX_NINTR);
if (msgs < 2)
goto msi;
sc->feat_en |= IXGBE_FEATURE_MSIX;
/* Figure out a reasonable auto config value */
queues = (ncpu > (msgs - 1)) ? (msgs - 1) : ncpu;
#ifdef RSS
/* If we're doing RSS, clamp at the number of RSS buckets */
if (sc->feat_en & IXGBE_FEATURE_RSS)
queues = uimin(queues, rss_getnumbuckets());
#endif
if (ixgbe_num_queues > queues) {
aprint_error_dev(sc->dev,
"ixgbe_num_queues (%d) is too large, "
"using reduced amount (%d).\n", ixgbe_num_queues, queues);
ixgbe_num_queues = queues;
}
/*
* Want one vector (RX/TX pair) per queue
* plus an additional for Link.
*/
want = queues + 1;
if (msgs >= want)
msgs = want;
else {
aprint_error_dev(dev, "MSI-X Configuration Problem, "
"%d vectors but %d queues wanted!\n", msgs, want);
goto msi;
}
sc->num_queues = queues;
sc->feat_en |= IXGBE_FEATURE_MSIX;
return (0);
/*
* MSI-X allocation failed or provided us with
* less vectors than needed. Free MSI-X resources
* and we'll try enabling MSI.
*/
msi:
/* Without MSI-X, some features are no longer supported */
sc->feat_cap &= ~IXGBE_FEATURE_RSS;
sc->feat_en &= ~IXGBE_FEATURE_RSS;
sc->feat_cap &= ~IXGBE_FEATURE_SRIOV;
sc->feat_en &= ~IXGBE_FEATURE_SRIOV;