/* $NetBSD: stg.c,v 1.6 2011/10/30 21:08:33 phx Exp $ */

/*-
* Copyright (c) 2011 Frank Wille.
* All rights reserved.
*
* Written by Frank Wille for The NetBSD Project.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <sys/param.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>

#include <lib/libsa/stand.h>
#include <lib/libsa/net.h>

#include "globals.h"

#define CSR_WRITE_1(l, r, v)    out8((l)->csr+(r), (v))
#define CSR_READ_1(l, r)        in8((l)->csr+(r))
#define CSR_WRITE_2(l, r, v)    out16rb((l)->csr+(r), (v))
#define CSR_READ_2(l, r)        in16rb((l)->csr+(r))
#define CSR_WRITE_4(l, r, v)    out32rb((l)->csr+(r), (v))
#define CSR_READ_4(l, r)        in32rb((l)->csr+(r))
#define VTOPHYS(va)             (uint32_t)(va)
#define DEVTOV(pa)              (uint32_t)(pa)
#define wbinv(adr, siz)         _wbinv(VTOPHYS(adr), (uint32_t)(siz))
#define inv(adr, siz)           _inv(VTOPHYS(adr), (uint32_t)(siz))
#define DELAY(n)                delay(n)
#define ALLOC(T,A)              (T *)allocaligned(sizeof(T),(A))

struct desc {
       uint64_t xd0, xd1, xd2, dummy;
};
#define T1_EMPTY                (1U << 31)      /* no Tx frame available */
#define T1_NOALIGN              (3U << 16)      /* allow any Tx alignment */
#define T1_CNTSHIFT             24              /* Tx fragment count */
#define T2_LENSHIFT             48              /* Tx fragment length */
#define R1_DONE                 (1U << 31)      /* desc has a Rx frame */
#define R1_FL_MASK              0xffff          /* Rx frame length */
#define R1_ER_MASK              0x3f0000        /* Rx error indication */
#define R2_LENSHIFT             48              /* Rx fragment length */

#define STGE_DMACtrl            0x00
#define  DMAC_RxDMAPollNow      (1U << 4)
#define  DMAC_TxDMAPollNow      (1U << 12)
#define STGE_TFDListPtrLo       0x10
#define STGE_TFDListPtrHi       0x14
#define STGE_RFDListPtrLo       0x1c
#define STGE_RFDListPtrHi       0x20
#define STGE_DebugCtrl          0x2c
#define STGE_AsicCtrl           0x30
#define  AC_PhyMedia            (1U << 7)
#define  AC_GlobalReset         (1U << 16)
#define  AC_RxReset             (1U << 17)
#define  AC_TxReset             (1U << 18)
#define  AC_DMA                 (1U << 19)
#define  AC_FIFO                (1U << 20)
#define  AC_Network             (1U << 21)
#define  AC_Host                (1U << 22)
#define  AC_AutoInit            (1U << 23)
#define  AC_RstOut              (1U << 24)
#define  AC_ResetBusy           (1U << 26)
#define STGE_EepromData         0x48
#define STGE_EepromCtrl         0x4a
#define  EC_EepromAddress(x)    ((x) & 0xff)
#define  EC_EepromOpcode(x)     ((x) << 8)
#define  EC_OP_RR               2
#define  EC_EepromBusy          (1U << 15)
#define STGE_IntEnable          0x5c
#define STGE_MACCtrl            0x6c
#define  MC_DuplexSelect        (1U << 5)
#define  MC_StatisticsDisable   (1U << 22)
#define  MC_TxEnable            (1U << 24)
#define  MC_RxEnable            (1U << 27)
#define STGE_PhyCtrl            0x76
#define  PC_MgmtClk             (1U << 0)
#define  PC_MgmtData            (1U << 1)
#define  PC_MgmtDir             (1U << 2)
#define  PC_PhyDuplexPolarity   (1U << 3)
#define  PC_PhyDuplexStatus     (1U << 4)
#define  PC_PhyLnkPolarity      (1U << 5)
#define  PC_LinkSpeed(x)        (((x) >> 6) & 3)
#define  PC_LinkSpeed_Down      0
#define  PC_LinkSpeed_10        1
#define  PC_LinkSpeed_100       2
#define  PC_LinkSpeed_1000      3
#define STGE_StationAddress0    0x78
#define STGE_StationAddress1    0x7a
#define STGE_StationAddress2    0x7c
#define STGE_MaxFrameSize       0x84
#define STGE_ReceiveMode        0x88
#define  RM_ReceiveUnicast      (1U << 0)
#define  RM_ReceiveMulticast    (1U << 1)
#define  RM_ReceiveBroadcast    (1U << 2)
#define  RM_ReceiveAllFrames    (1U << 3)
#define  RM_ReceiveMulticastHash (1U << 4)
#define  RM_ReceiveIPMulticast  (1U << 5)

#define STGE_EEPROM_SA0         0x10

#define FRAMESIZE       1536

struct local {
       struct desc txd[2];
       struct desc rxd[2];
       uint8_t rxstore[2][FRAMESIZE];
       unsigned csr, rx, tx, phy;
       uint16_t bmsr, anlpar;
       uint8_t phyctrl_saved;
};

static void stg_reset(struct local *);
static int mii_read(struct local *, int, int);
static void mii_write(struct local *, int, int, int);
static void mii_initphy(struct local *);
static void mii_dealan(struct local *, unsigned);
static void mii_bitbang_sync(struct local *);
static void mii_bitbang_send(struct local *, uint32_t, int);
static void mii_bitbang_clk(struct local *, uint8_t);
static int eeprom_wait(struct local *);

int
stg_match(unsigned tag, void *data)
{
       unsigned v;

       v = pcicfgread(tag, PCI_ID_REG);
       switch (v) {
       case PCI_DEVICE(0x13f0, 0x1023):        /* ST1023, IP1000A */
               return 1;
       }
       return 0;
}

void *
stg_init(unsigned tag, void *data)
{
       struct local *l;
       struct desc *txd, *rxd;
       uint8_t *en;
       unsigned i;
       uint32_t macctl, reg;
       static uint8_t bad[2][6] = {
               { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
               { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
       };

       l = ALLOC(struct local, 32);            /* desc alignment */
       memset(l, 0, sizeof(struct local));
       l->csr = DEVTOV(pcicfgread(tag, 0x14)); /* first try mem space */
       if (l->csr == 0)
               l->csr = DEVTOV(PCI_XIOBASE + (pcicfgread(tag, 0x10) & ~01));

       stg_reset(l);

       mii_initphy(l);

       /* read ethernet address */
       en = data;
       for (i = 0; i < 6; i++)
               en[i] = CSR_READ_1(l, STGE_StationAddress0 + i);

       if (memcmp(en, bad[0], 6) == 0 || memcmp(en, bad[1], 6) == 0) {
               uint16_t addr[3];

               for (i = 0; i < 3; i++) {
                       if (eeprom_wait(l) != 0)
                               printf("NIC: serial EEPROM is not ready!\n");
                       CSR_WRITE_2(l, STGE_EepromCtrl,
                           EC_EepromAddress(STGE_EEPROM_SA0 + i) |
                           EC_EepromOpcode(EC_OP_RR));
                       if (eeprom_wait(l) != 0)
                               printf("NIC: serial EEPROM read time out!\n");
                       addr[i] = le16toh(CSR_READ_2(l, STGE_EepromData));
               }
               (void)memcpy(en, addr, 6);

               /* try to read MAC from Flash, when EEPROM is empty/missing */
               if (memcmp(en, bad[0], 6) == 0 || memcmp(en, bad[1], 6) == 0)
                       read_mac_from_flash(en);

               /* set the station address now */
               for (i = 0; i < 6; i++)
                       CSR_WRITE_1(l, STGE_StationAddress0 + i, en[i]);
       }

       printf("MAC address %02x:%02x:%02x:%02x:%02x:%02x\n",
           en[0], en[1], en[2], en[3], en[4], en[5]);

       DPRINTF(("PHY %d (%04x.%04x)\n", l->phy,
           mii_read(l, l->phy, 2), mii_read(l, l->phy, 3)));

       /* setup descriptors */
       txd = &l->txd[0];
       txd[0].xd0 = htole64(VTOPHYS(&txd[1]));
       txd[0].xd1 = htole64(T1_EMPTY);
       txd[1].xd0 = htole64(VTOPHYS(&txd[0]));
       txd[1].xd1 = htole64(T1_EMPTY);
       rxd = &l->rxd[0];
       rxd[0].xd0 = htole64(VTOPHYS(&rxd[1]));
       rxd[0].xd2 = htole64(VTOPHYS(l->rxstore[0]) |
           ((uint64_t)FRAMESIZE << 48));
       rxd[1].xd0 = htole64(VTOPHYS(&rxd[0]));
       rxd[1].xd2 = htole64(VTOPHYS(l->rxstore[1]) |
           ((uint64_t)FRAMESIZE << 48));
       wbinv(l, sizeof(struct local));

       CSR_WRITE_2(l, STGE_IntEnable, 0);
       CSR_WRITE_2(l, STGE_ReceiveMode, RM_ReceiveUnicast |
           RM_ReceiveBroadcast | RM_ReceiveAllFrames | RM_ReceiveMulticast);
       CSR_WRITE_4(l, STGE_TFDListPtrHi, 0);
       CSR_WRITE_4(l, STGE_TFDListPtrLo, VTOPHYS(txd));
       CSR_WRITE_4(l, STGE_RFDListPtrHi, 0);
       CSR_WRITE_4(l, STGE_RFDListPtrLo, VTOPHYS(rxd));
       CSR_WRITE_2(l, STGE_MaxFrameSize, FRAMESIZE);
       CSR_WRITE_4(l, STGE_MACCtrl, 0);        /* do IFSSelect(0) first */
       macctl = MC_StatisticsDisable | MC_TxEnable | MC_RxEnable;

       if (PCI_REVISION(pcicfgread(tag, PCI_CLASS_REG)) >= 6) {
               /* some workarounds for revisions >= 6 */
               CSR_WRITE_2(l, STGE_DebugCtrl,
                   CSR_READ_2(l, STGE_DebugCtrl) | 0x0200);
               CSR_WRITE_2(l, STGE_DebugCtrl,
                   CSR_READ_2(l, STGE_DebugCtrl) | 0x0010);
               CSR_WRITE_2(l, STGE_DebugCtrl,
                   CSR_READ_2(l, STGE_DebugCtrl) | 0x0020);
       }

       /* auto negotiation, set the current media */
       mii_dealan(l, 5);

       reg = CSR_READ_1(l, STGE_PhyCtrl);
       switch (PC_LinkSpeed(reg)) {
       case PC_LinkSpeed_1000:
               printf("1000Mbps");
               break;
       case PC_LinkSpeed_100:
               printf("100Mbps");
               break;
       case PC_LinkSpeed_10:
               printf("10Mbps");
               break;
       }
       if (reg & PC_PhyDuplexStatus) {
               macctl |= MC_DuplexSelect;
               printf("-FDX");
       }
       printf("\n");
       CSR_WRITE_4(l, STGE_MACCtrl, macctl);

       return l;
}

void
stg_shutdown(void *dev)
{
       struct local *l = dev;

       /*
        * We have to reset the chip, when we don't need it anymore,
        * otherwise bad things will happen (e.g. the DSM-G600 will no
        * longer be able to reboot).
        */
       stg_reset(l);
}

int
stg_send(void *dev, char *buf, unsigned len)
{
       struct local *l = dev;
       volatile struct desc *txd;
       unsigned loop;

       wbinv(buf, len);
       txd = &l->txd[l->tx];
       txd->xd2 = htole64(VTOPHYS(buf) | ((uint64_t)len << 48));
       txd->xd1 = htole64(T1_NOALIGN | (1 << 24));
       wbinv(txd, sizeof(struct desc));
       CSR_WRITE_4(l, STGE_DMACtrl, DMAC_TxDMAPollNow);
       loop = 100;
       do {
               if ((le64toh(txd->xd1) & T1_EMPTY) != 0)
                       goto done;
               DELAY(10);
               inv(txd, sizeof(struct desc));
       } while (--loop > 0);
       printf("xmit failed\n");
       return -1;
 done:
       l->tx ^= 1;
       return len;
}

int
stg_recv(void *dev, char *buf, unsigned maxlen, unsigned timo)
{
       struct local *l = dev;
       volatile struct desc *rxd;
       uint32_t sts;
       unsigned bound, len;
       uint8_t *ptr;

       bound = 1000 * timo;
 again:
       rxd = &l->rxd[l->rx];
       do {
               inv(rxd, sizeof(struct desc));
               sts = (uint32_t)le64toh(rxd->xd1);
               if ((sts & R1_DONE) != 0)
                       goto gotone;
               DELAY(1000);    /* 1 milli second */
       } while (--bound > 0);
       errno = 0;
       return -1;
 gotone:
       if ((sts & R1_ER_MASK) != 0) {
               rxd->xd1 = 0;
               wbinv(rxd, sizeof(struct desc));
               l->rx ^= 1;
               goto again;
       }
       len = sts & R1_FL_MASK;
       if (len > maxlen)
               len = maxlen;
       ptr = l->rxstore[l->rx];
       inv(ptr, len);
       memcpy(buf, ptr, len);
       rxd->xd1 = 0;
       wbinv(rxd, sizeof(struct desc));
       l->rx ^= 1;
       return len;
}

static void
stg_reset(struct local *l)
{
       uint32_t reg;
       int i;

       reg = CSR_READ_4(l, STGE_AsicCtrl);
       CSR_WRITE_4(l, STGE_AsicCtrl, reg | AC_GlobalReset | AC_RxReset |
           AC_TxReset | AC_DMA | AC_FIFO | AC_Network | AC_Host |
           AC_AutoInit | ((reg & AC_PhyMedia) ? AC_RstOut : 0));
       DELAY(50000);
       for (i = 0; i < 1000; i++) {
               DELAY(5000);
               if ((CSR_READ_4(l, STGE_AsicCtrl) & AC_ResetBusy) == 0)
                       break;
       }
       if (i >= 1000)
               printf("NIC reset failed to complete!\n");
       DELAY(1000);
}

#define R0110   6               /* 0110b read op */
#define W0101   5               /* 0101b write op */
#define A10     2               /* 10b ack turn around */

/* read the MII by bitbanging STGE_PhyCtrl */
static int
mii_read(struct local *l, int phy, int reg)
{
       unsigned data;
       int i;
       uint8_t v;

       /* initiate read access */
       data = (R0110 << 10) | (phy << 5) | reg;
       mii_bitbang_sync(l);
       mii_bitbang_send(l, data, 14); /* 4OP + 5PHY + 5REG */

       /* switch direction to PHY->host */
       v = l->phyctrl_saved;
       CSR_WRITE_1(l, STGE_PhyCtrl, v);

       /* read data */
       data = 0;
       for (i = 0; i < 18; i++) { /* 2TA + 16DATA */
               data <<= 1;
               data |= !!(CSR_READ_1(l, STGE_PhyCtrl) & PC_MgmtData);
               mii_bitbang_clk(l, v);
       }

       return data & 0xffff;
}

/* write the MII by bitbanging STGE_PhyCtrl */
static void
mii_write(struct local *l, int phy, int reg, int val)
{
       unsigned data;

       data = (W0101 << 28) | (phy << 23) | (reg << 18) | (A10 << 16);
       data |= val;

       mii_bitbang_sync(l);
       mii_bitbang_send(l, data, 32); /* 4OP + 5PHY + 5REG + 2TA + 16DATA */
}

#define MII_BMCR        0x00    /* Basic mode control register (rw) */
#define  BMCR_RESET     0x8000  /* reset */
#define  BMCR_AUTOEN    0x1000  /* autonegotiation enable */
#define  BMCR_ISO       0x0400  /* isolate */
#define  BMCR_STARTNEG  0x0200  /* restart autonegotiation */
#define MII_BMSR        0x01    /* Basic mode status register (ro) */
#define  BMSR_ACOMP     0x0020  /* Autonegotiation complete */
#define  BMSR_LINK      0x0004  /* Link status */
#define MII_ANAR        0x04    /* Autonegotiation advertisement (rw) */
#define  ANAR_FC        0x0400  /* local device supports PAUSE */
#define  ANAR_TX_FD     0x0100  /* local device supports 100bTx FD */
#define  ANAR_TX        0x0080  /* local device supports 100bTx */
#define  ANAR_10_FD     0x0040  /* local device supports 10bT FD */
#define  ANAR_10        0x0020  /* local device supports 10bT */
#define  ANAR_CSMA      0x0001  /* protocol selector CSMA/CD */
#define MII_ANLPAR      0x05    /* Autonegotiation lnk partner abilities (rw) */

static void
mii_initphy(struct local *l)
{
       int phy, ctl, sts, bound;

       l->phyctrl_saved = CSR_READ_1(l, STGE_PhyCtrl) &
           (PC_PhyDuplexPolarity | PC_PhyLnkPolarity);

       for (phy = 0; phy < 32; phy++) {
               ctl = mii_read(l, phy, MII_BMCR);
               sts = mii_read(l, phy, MII_BMSR);
               if (ctl != 0xffff && sts != 0xffff && sts != 0)
                       goto found;
       }
       printf("MII: no PHY found\n");
       return;

 found:
       ctl = mii_read(l, phy, MII_BMCR);
       mii_write(l, phy, MII_BMCR, ctl | BMCR_RESET);

       bound = 100;
       do {
               DELAY(10);
               ctl = mii_read(l, phy, MII_BMCR);
               if (ctl == 0xffff) {
                       printf("MII: PHY %d has died after reset\n", phy);
                       return;
               }
       } while (bound-- > 0 && (ctl & BMCR_RESET));
       if (bound == 0)
               printf("PHY %d reset failed\n", phy);

       ctl &= ~BMCR_ISO;
       mii_write(l, phy, MII_BMCR, ctl);
       sts = mii_read(l, phy, MII_BMSR) |
           mii_read(l, phy, MII_BMSR); /* read twice */
       l->phy = phy;
       l->bmsr = sts;
}

static void
mii_dealan(struct local *l, unsigned timo)
{
       unsigned anar, bound;

       anar = ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA;
       mii_write(l, l->phy, MII_ANAR, anar);
       mii_write(l, l->phy, MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG);
       l->anlpar = 0;
       bound = getsecs() + timo;
       do {
               l->bmsr = mii_read(l, l->phy, MII_BMSR) |
                  mii_read(l, l->phy, MII_BMSR); /* read twice */
               if ((l->bmsr & BMSR_LINK) && (l->bmsr & BMSR_ACOMP)) {
                       l->anlpar = mii_read(l, l->phy, MII_ANLPAR);
                       break;
               }
               DELAY(10 * 1000);
       } while (getsecs() < bound);
}

static void
mii_bitbang_sync(struct local *l)
{
       int i;
       uint8_t v;

       v = l->phyctrl_saved | PC_MgmtDir | PC_MgmtData;
       CSR_WRITE_1(l, STGE_PhyCtrl, v);
       DELAY(1);
       for (i = 0; i < 32; i++)
               mii_bitbang_clk(l, v);
}

static void
mii_bitbang_send(struct local *l, uint32_t data, int nbits)
{
       uint32_t i;
       uint8_t v;

       v = l->phyctrl_saved | PC_MgmtDir;
       CSR_WRITE_1(l, STGE_PhyCtrl, v);
       DELAY(1);
       for (i = 1 << (nbits - 1); i != 0; i >>= 1) {
               if (data & i)
                       v |= PC_MgmtData;
               else
                       v &= ~PC_MgmtData;
               CSR_WRITE_1(l, STGE_PhyCtrl, v);
               DELAY(1);
               mii_bitbang_clk(l, v);
       }
}

static void
mii_bitbang_clk(struct local *l, uint8_t v)
{

       CSR_WRITE_1(l, STGE_PhyCtrl, v | PC_MgmtClk);
       DELAY(1);
       CSR_WRITE_1(l, STGE_PhyCtrl, v);
       DELAY(1);
}

static int
eeprom_wait(struct local *l)
{
       int i;

       for (i = 0; i < 1000; i++) {
               DELAY(1000);
               if ((CSR_READ_2(l, STGE_EepromCtrl) & EC_EepromBusy) == 0)
                       return 0;
       }
       return 1;
}