/*      $NetBSD: cpu_subr.c,v 1.111 2025/02/17 11:56:56 jmcneill Exp $  */

/*-
* Copyright (c) 2001 Matt Thomas.
* Copyright (c) 2001 Tsubai Masanari.
* Copyright (c) 1998, 1999, 2001 Internet Research Institute, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
*    must display the following acknowledgement:
*      This product includes software developed by
*      Internet Research Institute, Inc.
* 4. The name of the author may not be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.111 2025/02/17 11:56:56 jmcneill Exp $");

#include "sysmon_envsys.h"

#ifdef _KERNEL_OPT
#include "opt_altivec.h"
#include "opt_multiprocessor.h"
#include "opt_ppcarch.h"
#include "opt_ppccache.h"
#include "opt_ppcparam.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/types.h>
#include <sys/lwp.h>
#include <sys/xcall.h>

#include <uvm/uvm.h>

#include <powerpc/pcb.h>
#include <powerpc/psl.h>
#include <powerpc/spr.h>
#include <powerpc/oea/hid.h>
#include <powerpc/oea/hid_601.h>
#include <powerpc/oea/spr.h>
#include <powerpc/oea/cpufeat.h>

#include <dev/sysmon/sysmonvar.h>

static void cpu_enable_l2cr(register_t);
static void cpu_enable_l3cr(register_t);
static void cpu_config_l2cr(int);
static void cpu_config_l3cr(int);
static void cpu_probe_speed(struct cpu_info *);
static void cpu_idlespin(void);
static void cpu_set_dfs_xcall(void *, void *);
#if NSYSMON_ENVSYS > 0
static void cpu_tau_setup(struct cpu_info *);
static void cpu_tau_refresh(struct sysmon_envsys *, envsys_data_t *);
#endif

extern void init_scom_speedctl(void);

int cpu = -1;
int ncpus;

struct fmttab {
       register_t fmt_mask;
       register_t fmt_value;
       const char *fmt_string;
};

/*
* This should be one per CPU but since we only support it on 750 variants it
* doesn't really matter since none of them support SMP
*/
envsys_data_t sensor;

static const struct fmttab cpu_7450_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { L2CR_L2E, ~0, " 256KB L2 cache" },
       { L2CR_L2PE, 0, " no parity" },
       { L2CR_L2PE, L2CR_L2PE, " parity enabled" },
       { 0, 0, NULL }
};

static const struct fmttab cpu_7448_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { L2CR_L2E, ~0, " 1MB L2 cache" },
       { L2CR_L2PE, 0, " no parity" },
       { L2CR_L2PE, L2CR_L2PE, " parity enabled" },
       { 0, 0, NULL }
};

static const struct fmttab cpu_7457_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { L2CR_L2E, ~0, " 512KB L2 cache" },
       { L2CR_L2PE, 0, " no parity" },
       { L2CR_L2PE, L2CR_L2PE, " parity enabled" },
       { 0, 0, NULL }
};

static const struct fmttab cpu_7450_l3cr_formats[] = {
       { L3CR_L3DO|L3CR_L3IO, L3CR_L3DO, " data-only" },
       { L3CR_L3DO|L3CR_L3IO, L3CR_L3IO, " instruction-only" },
       { L3CR_L3DO|L3CR_L3IO, L3CR_L3DO|L3CR_L3IO, " locked" },
       { L3CR_L3SIZ, L3SIZ_2M, " 2MB" },
       { L3CR_L3SIZ, L3SIZ_1M, " 1MB" },
       { L3CR_L3PE|L3CR_L3APE, L3CR_L3PE|L3CR_L3APE, " parity" },
       { L3CR_L3PE|L3CR_L3APE, L3CR_L3PE, " data-parity" },
       { L3CR_L3PE|L3CR_L3APE, L3CR_L3APE, " address-parity" },
       { L3CR_L3PE|L3CR_L3APE, 0, " no-parity" },
       { L3CR_L3SIZ, ~0, " L3 cache" },
       { L3CR_L3RT, L3RT_MSUG2_DDR, " (DDR SRAM)" },
       { L3CR_L3RT, L3RT_PIPELINE_LATE, " (LW SRAM)" },
       { L3CR_L3RT, L3RT_PB2_SRAM, " (PB2 SRAM)" },
       { L3CR_L3CLK, ~0, " at" },
       { L3CR_L3CLK, L3CLK_20, " 2:1" },
       { L3CR_L3CLK, L3CLK_25, " 2.5:1" },
       { L3CR_L3CLK, L3CLK_30, " 3:1" },
       { L3CR_L3CLK, L3CLK_35, " 3.5:1" },
       { L3CR_L3CLK, L3CLK_40, " 4:1" },
       { L3CR_L3CLK, L3CLK_50, " 5:1" },
       { L3CR_L3CLK, L3CLK_60, " 6:1" },
       { L3CR_L3CLK, ~0, " ratio" },
       { 0, 0, NULL },
};

static const struct fmttab cpu_ibm750cl_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { 0, ~0, " 256KB" },
       { L2CR_L2WT, L2CR_L2WT, " WT" },
       { L2CR_L2WT, 0, " WB" },
       { L2CR_L2PE, L2CR_L2PE, " with ECC" },
       { 0, ~0, " L2 cache" },
       { 0, 0, NULL }
};

static const struct fmttab cpu_ibm750_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { 0, ~0, " 512KB" },
       { L2CR_L2WT, L2CR_L2WT, " WT" },
       { L2CR_L2WT, 0, " WB" },
       { L2CR_L2PE, L2CR_L2PE, " with ECC" },
       { 0, ~0, " L2 cache" },
       { 0, 0, NULL }
};

static const struct fmttab cpu_l2cr_formats[] = {
       { L2CR_L2E, 0, " disabled" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO, " data-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2IO, " instruction-only" },
       { L2CR_L2DO|L2CR_L2IO, L2CR_L2DO|L2CR_L2IO, " locked" },
       { L2CR_L2PE, L2CR_L2PE, " parity" },
       { L2CR_L2PE, 0, " no-parity" },
       { L2CR_L2SIZ, L2SIZ_2M, " 2MB" },
       { L2CR_L2SIZ, L2SIZ_1M, " 1MB" },
       { L2CR_L2SIZ, L2SIZ_512K, " 512KB" },
       { L2CR_L2SIZ, L2SIZ_256K, " 256KB" },
       { L2CR_L2WT, L2CR_L2WT, " WT" },
       { L2CR_L2WT, 0, " WB" },
       { L2CR_L2E, ~0, " L2 cache" },
       { L2CR_L2RAM, L2RAM_FLOWTHRU_BURST, " (FB SRAM)" },
       { L2CR_L2RAM, L2RAM_PIPELINE_LATE, " (LW SRAM)" },
       { L2CR_L2RAM, L2RAM_PIPELINE_BURST, " (PB SRAM)" },
       { L2CR_L2CLK, ~0, " at" },
       { L2CR_L2CLK, L2CLK_10, " 1:1" },
       { L2CR_L2CLK, L2CLK_15, " 1.5:1" },
       { L2CR_L2CLK, L2CLK_20, " 2:1" },
       { L2CR_L2CLK, L2CLK_25, " 2.5:1" },
       { L2CR_L2CLK, L2CLK_30, " 3:1" },
       { L2CR_L2CLK, L2CLK_35, " 3.5:1" },
       { L2CR_L2CLK, L2CLK_40, " 4:1" },
       { L2CR_L2CLK, ~0, " ratio" },
       { 0, 0, NULL }
};

static void cpu_fmttab_print(const struct fmttab *, register_t);

struct cputab {
       const char name[8];
       uint16_t version;
       uint16_t revfmt;
};
#define REVFMT_MAJMIN   1               /* %u.%u */
#define REVFMT_HEX      2               /* 0x%04x */
#define REVFMT_DEC      3               /* %u */
static const struct cputab models[] = {
       { "601",        MPC601,         REVFMT_DEC },
       { "602",        MPC602,         REVFMT_DEC },
       { "603",        MPC603,         REVFMT_MAJMIN },
       { "603e",       MPC603e,        REVFMT_MAJMIN },
       { "603ev",      MPC603ev,       REVFMT_MAJMIN },
       { "G2",         MPCG2,          REVFMT_MAJMIN },
       { "604",        MPC604,         REVFMT_MAJMIN },
       { "604e",       MPC604e,        REVFMT_MAJMIN },
       { "604ev",      MPC604ev,       REVFMT_MAJMIN },
       { "620",        MPC620,         REVFMT_HEX },
       { "750",        MPC750,         REVFMT_MAJMIN },
       { "750FX",      IBM750FX,       REVFMT_MAJMIN },
       { "750GX",      IBM750GX,       REVFMT_MAJMIN },
       { "7400",       MPC7400,        REVFMT_MAJMIN },
       { "7410",       MPC7410,        REVFMT_MAJMIN },
       { "7450",       MPC7450,        REVFMT_MAJMIN },
       { "7455",       MPC7455,        REVFMT_MAJMIN },
       { "7457",       MPC7457,        REVFMT_MAJMIN },
       { "7447A",      MPC7447A,       REVFMT_MAJMIN },
       { "7448",       MPC7448,        REVFMT_MAJMIN },
       { "8240",       MPC8240,        REVFMT_MAJMIN },
       { "8245",       MPC8245,        REVFMT_MAJMIN },
       { "970",        IBM970,         REVFMT_MAJMIN },
       { "970FX",      IBM970FX,       REVFMT_MAJMIN },
       { "970MP",      IBM970MP,       REVFMT_MAJMIN },
       { "POWER3II",   IBMPOWER3II,    REVFMT_MAJMIN },
       { "",           0,              REVFMT_HEX }
};

#include <powerpc/oea/bat.h>
extern struct bat battable[];

#ifdef MULTIPROCESSOR
struct cpu_info cpu_info[CPU_MAXNUM] = {
   [0] = {
       .ci_curlwp = &lwp0,
       .ci_battable = battable,
   },
};
volatile struct cpu_hatch_data *cpu_hatch_data;
volatile int cpu_hatch_stack;
#define HATCH_STACK_SIZE 0x1000
extern int ticks_per_intr;
#include <powerpc/pic/picvar.h>
#include <powerpc/pic/ipivar.h>
#else
struct cpu_info cpu_info[1] = {
   [0] = {
       .ci_curlwp = &lwp0,
       .ci_battable = battable,
   },
};
#endif /*MULTIPROCESSOR*/

int cpu_altivec;
register_t cpu_psluserset;
register_t cpu_pslusermod;
register_t cpu_pslusermask = 0xffff;

unsigned long oeacpufeat;

void
cpu_features_probe(void)
{
       static bool feature_probe_done;

       u_int pvr, vers;

       if (feature_probe_done) {
               return;
       }

       pvr = mfpvr();
       vers = pvr >> 16;

       if ((vers >= IBMRS64II && vers <= IBM970GX) || vers == MPC620 ||
           vers == IBMCELL || vers == IBMPOWER6P5) {
               oeacpufeat |= OEACPU_64;
               oeacpufeat |= OEACPU_64_BRIDGE;
               oeacpufeat |= OEACPU_NOBAT;

       } else if (vers == MPC601) {
               oeacpufeat |= OEACPU_601;

       } else if (MPC745X_P(vers)) {
               if (vers != MPC7450) {
                       /* Enable more SPRG registers */
                       oeacpufeat |= OEACPU_HIGHSPRG;

                       /* Enable more BAT registers */
                       oeacpufeat |= OEACPU_HIGHBAT;

                       /* Enable larger BAT registers */
                       oeacpufeat |= OEACPU_XBSEN;
               }

       } else if (vers == IBM750FX || vers == IBM750GX) {
               oeacpufeat |= OEACPU_HIGHBAT;
       }

       feature_probe_done = true;
}

void
cpu_features_enable(void)
{
       static bool feature_enable_done;

       if (feature_enable_done) {
               return;
       }

       u_int pvr, vers;

       pvr = mfpvr();
       vers = pvr >> 16;

       if (MPC745X_P(vers)) {
               register_t hid0 = mfspr(SPR_HID0);
               register_t hid1 = mfspr(SPR_HID1);

               const register_t ohid0 = hid0;

               if (oeacpufeat & OEACPU_HIGHBAT) {
                       hid0 |= HID0_HIGH_BAT_EN;
               }

               if (oeacpufeat & OEACPU_XBSEN) {
                       hid0 |= HID0_XBSEN;
               }

               if (hid0 != ohid0) {
                       mtspr(SPR_HID0, hid0);
                       __asm volatile("sync;isync");
               }

               /* Enable address broadcasting for MP systems */
               hid1 |= HID1_SYNCBE | HID1_ABE;

               mtspr(SPR_HID1, hid1);
               __asm volatile("sync;isync");
       }

       feature_enable_done = true;
}

/* This is to be called from locore.S, and nowhere else. */

void
cpu_model_init(void)
{
       /*
        * This is just a wrapper for backwards-compatibility, and will
        * probably be garbage-collected in the near future.
        */
       cpu_features_probe();
       cpu_features_enable();
}

void
cpu_fmttab_print(const struct fmttab *fmt, register_t data)
{
       for (; fmt->fmt_mask != 0 || fmt->fmt_value != 0; fmt++) {
               if ((~fmt->fmt_mask & fmt->fmt_value) != 0 ||
                   (data & fmt->fmt_mask) == fmt->fmt_value)
                       aprint_normal("%s", fmt->fmt_string);
       }
}

void
cpu_idlespin(void)
{
       register_t msr;

       if (powersave <= 0)
               return;

#if defined(_ARCH_PPC64) || defined (PPC_OEA64_BRIDGE)
       if (cpu_altivec)
               __asm volatile("dssall");
#endif

       __asm volatile(
               "sync;"
               "mfmsr  %0;"
               "oris   %0,%0,%1@h;"    /* enter power saving mode */
               "mtmsr  %0;"
               "isync;"
           :   "=r"(msr)
           :   "J"(PSL_POW));
}

void
cpu_probe_cache(void)
{
       u_int assoc, pvr, vers;

       pvr = mfpvr();
       vers = pvr >> 16;


       /* Presently common across almost all implementations. */
       curcpu()->ci_ci.dcache_line_size = 32;
       curcpu()->ci_ci.icache_line_size = 32;


       switch (vers) {
#define K       *1024
       case IBM750FX:
       case IBM750GX:
       case MPC601:
       case MPC750:
       case MPC7400:
       case MPC7447A:
       case MPC7448:
       case MPC7450:
       case MPC7455:
       case MPC7457:
               curcpu()->ci_ci.dcache_size = 32 K;
               curcpu()->ci_ci.icache_size = 32 K;
               assoc = 8;
               break;
       case MPC603:
               curcpu()->ci_ci.dcache_size = 8 K;
               curcpu()->ci_ci.icache_size = 8 K;
               assoc = 2;
               break;
       case MPC603e:
       case MPC603ev:
       case MPC604:
       case MPC8240:
       case MPC8245:
       case MPCG2:
               curcpu()->ci_ci.dcache_size = 16 K;
               curcpu()->ci_ci.icache_size = 16 K;
               assoc = 4;
               break;
       case MPC604e:
       case MPC604ev:
               curcpu()->ci_ci.dcache_size = 32 K;
               curcpu()->ci_ci.icache_size = 32 K;
               assoc = 4;
               break;
       case IBMPOWER3II:
               curcpu()->ci_ci.dcache_size = 64 K;
               curcpu()->ci_ci.icache_size = 32 K;
               curcpu()->ci_ci.dcache_line_size = 128;
               curcpu()->ci_ci.icache_line_size = 128;
               assoc = 128; /* not a typo */
               break;
       case IBM970:
       case IBM970FX:
       case IBM970MP:
               curcpu()->ci_ci.dcache_size = 32 K;
               curcpu()->ci_ci.icache_size = 64 K;
               curcpu()->ci_ci.dcache_line_size = 128;
               curcpu()->ci_ci.icache_line_size = 128;
               assoc = 2;
               break;

       default:
               curcpu()->ci_ci.dcache_size = PAGE_SIZE;
               curcpu()->ci_ci.icache_size = PAGE_SIZE;
               assoc = 1;
#undef  K
       }

       /*
        * Possibly recolor.
        */
       uvm_page_recolor(atop(curcpu()->ci_ci.dcache_size / assoc));
}

struct cpu_info *
cpu_attach_common(device_t self, int id)
{
       struct cpu_info *ci;
       u_int pvr, vers;

       ci = &cpu_info[id];
#ifndef MULTIPROCESSOR
       /*
        * If this isn't the primary CPU, print an error message
        * and just bail out.
        */
       if (id != 0) {
               aprint_naive("\n");
               aprint_normal(": ID %d\n", id);
               aprint_normal_dev(self,
                   "processor off-line; "
                   "multiprocessor support not present in kernel\n");
               return (NULL);
       }
#endif

       ci->ci_cpuid = id;
       ci->ci_idepth = -1;
       ci->ci_dev = self;
       ci->ci_idlespin = cpu_idlespin;

#ifdef MULTIPROCESSOR
       /* Register IPI Interrupt */
       if ((ipiops.ppc_establish_ipi) && (id == 0))
               ipiops.ppc_establish_ipi(IST_LEVEL, IPL_HIGH, NULL);
#endif

       pvr = mfpvr();
       vers = (pvr >> 16) & 0xffff;

       switch (id) {
       case 0:
               /* load my cpu_number to PIR */
               switch (vers) {
               case MPC601:
               case MPC604:
               case MPC604e:
               case MPC604ev:
               case MPC7400:
               case MPC7410:
               case MPC7447A:
               case MPC7448:
               case MPC7450:
               case MPC7455:
               case MPC7457:
                       mtspr(SPR_PIR, id);
               }
               cpu_setup(self, ci);
               break;
       default:
               aprint_naive("\n");
               if (id >= CPU_MAXNUM) {
                       aprint_normal(": more than %d cpus?\n", CPU_MAXNUM);
                       panic("cpuattach");
               }
#ifndef MULTIPROCESSOR
               aprint_normal(" not configured\n");
               return NULL;
#else
               mi_cpu_attach(ci);
               break;
#endif
       }
       return (ci);
}

void
cpu_setup(device_t self, struct cpu_info *ci)
{
       u_int pvr, vers;
       const char * const xname = device_xname(self);
       const char *bitmask;
       char hidbuf[128];
       char model[80];
#if defined(PPC_OEA64_BRIDGE) || defined(_ARCH_PPC64)
       char hidbuf_u[128];
       const char *bitmasku = NULL;
       volatile uint64_t hid64_0, hid64_0_save;
#endif
#if !defined(_ARCH_PPC64)
       register_t hid0 = 0, hid0_save = 0;
#endif

       pvr = mfpvr();
       vers = (pvr >> 16) & 0xffff;

       cpu_identify(model, sizeof(model));
       aprint_naive("\n");
       aprint_normal(": %s, ID %d%s\n", model,  cpu_number(),
           cpu_number() == 0 ? " (primary)" : "");

       /* set the cpu number */
       ci->ci_cpuid = cpu_number();
#if defined(_ARCH_PPC64)
       __asm volatile("mfspr %0,%1" : "=r"(hid64_0) : "K"(SPR_HID0));
       hid64_0_save = hid64_0;
#else
#if defined(PPC_OEA64_BRIDGE)
       if ((oeacpufeat & OEACPU_64_BRIDGE) != 0)
               hid64_0_save = hid64_0 = mfspr(SPR_HID0);
       else
#endif
               hid0_save = hid0 = mfspr(SPR_HID0);
#endif


       cpu_probe_cache();

       /*
        * Configure power-saving mode.
        */
       switch (vers) {
#if !defined(_ARCH_PPC64)
       case MPC604:
       case MPC604e:
       case MPC604ev:
               /*
                * Do not have HID0 support settings, but can support
                * MSR[POW] off
                */
               powersave = 1;
               break;

       case MPC603:
       case MPC603e:
       case MPC603ev:
       case MPC7400:
       case MPC7410:
       case MPC8240:
       case MPC8245:
       case MPCG2:
               /* Select DOZE mode. */
               hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP);
               hid0 |= HID0_DOZE | HID0_DPM;
               powersave = 1;
               break;

       case MPC750:
       case IBM750FX:
       case IBM750GX:
               /* Select NAP mode. */
               hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP);
               hid0 |= HID0_NAP | HID0_DPM;
               powersave = 1;
               break;

       case MPC7447A:
       case MPC7448:
       case MPC7457:
       case MPC7455:
       case MPC7450:
               /* Enable the 7450 branch caches */
               hid0 |= HID0_SGE | HID0_BTIC;
               hid0 |= HID0_LRSTK | HID0_FOLD | HID0_BHT;
               /* Disable BTIC on 7450 Rev 2.0 or earlier */
               if (vers == MPC7450 && (pvr & 0xFFFF) <= 0x0200)
                       hid0 &= ~HID0_BTIC;
               /* Select NAP mode. */
               hid0 &= ~HID0_SLEEP;
               /* XXX my quicksilver hangs if nap is enabled */
               if (vers != MPC7450) {
                       hid0 |= HID0_NAP | HID0_DPM;
                       powersave = 1;
               }
               break;
#endif

       case IBM970:
       case IBM970FX:
       case IBM970MP:
#if defined(_ARCH_PPC64) || defined (PPC_OEA64_BRIDGE)
#if !defined(_ARCH_PPC64)
               KASSERT((oeacpufeat & OEACPU_64_BRIDGE) != 0);
#endif
               hid64_0 &= ~(HID0_64_DOZE | HID0_64_NAP | HID0_64_DEEPNAP);
               hid64_0 |= HID0_64_NAP | HID0_64_DPM | HID0_64_EX_TBEN |
                          HID0_64_TB_CTRL | HID0_64_EN_MCHK;
               powersave = 1;
               break;
#endif
       case IBMPOWER3II:
       default:
               /* No power-saving mode is available. */ ;
       }

#ifdef NAPMODE
       switch (vers) {
       case IBM750FX:
       case IBM750GX:
       case MPC750:
       case MPC7400:
               /* Select NAP mode. */
               hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP);
               hid0 |= HID0_NAP;
               break;
       }
#endif

       switch (vers) {
       case IBM750FX:
       case IBM750GX:
       case MPC750:
               hid0 &= ~HID0_DBP;              /* XXX correct? */
               hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT;
               break;

       case MPC7400:
       case MPC7410:
               hid0 &= ~HID0_SPD;
               hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT;
               hid0 |= HID0_EIEC;
               break;
       }

       /*
        * according to the 603e manual this is necessary for an external L2
        * cache to work properly
        */
       switch (vers) {
       case MPC603e:
               hid0 |= HID0_ABE;
       }

#if defined(_ARCH_PPC64) || defined(PPC_OEA64_BRIDGE)
#if defined(PPC_OEA64_BRIDGE)
       if ((oeacpufeat & OEACPU_64_BRIDGE) != 0) {
#endif
               if (hid64_0 != hid64_0_save) {
                       mtspr64(SPR_HID0, hid64_0);
               }
#if defined(PPC_OEA64_BRIDGE)
       } else {
#endif
#endif

#if !defined(_ARCH_PPC64)
               if (hid0 != hid0_save) {
                       mtspr(SPR_HID0, hid0);
                       __asm volatile("sync;isync");
               }
#endif
#if defined(PPC_OEA64_BRIDGE)
       }
#endif

       switch (vers) {
       case MPC601:
               bitmask = HID0_601_BITMASK;
               break;
       case MPC7447A:
       case MPC7448:
       case MPC7450:
       case MPC7455:
       case MPC7457:
               bitmask = HID0_7450_BITMASK;
               break;
       case IBM970:
       case IBM970FX:
       case IBM970MP:
               bitmask = HID0_970_BITMASK;
#if defined(PPC_OEA64_BRIDGE) || defined(_ARCH_PPC64)
               bitmasku = HID0_970_BITMASK_U;
#endif
               break;
       default:
               bitmask = HID0_BITMASK;
               break;
       }

#if defined(PPC_OEA64_BRIDGE) || defined(_ARCH_PPC64)
       if (bitmasku != NULL) {
               snprintb(hidbuf, sizeof hidbuf, bitmask, hid64_0 & 0xffffffff);
               snprintb(hidbuf_u, sizeof hidbuf_u, bitmasku, hid64_0 >> 32);
               aprint_normal_dev(self, "HID0 %s %s, powersave: %d\n",
                   hidbuf_u, hidbuf, powersave);
       } else
#endif
       {
               snprintb(hidbuf, sizeof hidbuf, bitmask, hid0);
               aprint_normal_dev(self, "HID0 %s, powersave: %d\n",
                   hidbuf, powersave);
       }

       ci->ci_khz = 0;

       /*
        * Display speed and cache configuration.
        */
       switch (vers) {
       case MPC604:
       case MPC604e:
       case MPC604ev:
       case MPC750:
       case IBM750FX:
       case IBM750GX:
       case MPC7400:
       case MPC7410:
       case MPC7447A:
       case MPC7448:
       case MPC7450:
       case MPC7455:
       case MPC7457:
               aprint_normal_dev(self, "");
               cpu_probe_speed(ci);
               aprint_normal("%u.%02u MHz",
                             ci->ci_khz / 1000, (ci->ci_khz / 10) % 100);
               switch (vers) {
               case MPC7450: /* 7441 does not have L3! */
               case MPC7455: /* 7445 does not have L3! */
               case MPC7457: /* 7447 does not have L3! */
                       cpu_config_l3cr(vers);
                       break;
               case IBM750FX:
               case IBM750GX:
               case MPC750:
               case MPC7400:
               case MPC7410:
               case MPC7447A:
               case MPC7448:
                       cpu_config_l2cr(pvr);
                       break;
               default:
                       break;
               }
               aprint_normal("\n");
               break;
       }

#if NSYSMON_ENVSYS > 0
       /*
        * Attach MPC750 temperature sensor to the envsys subsystem.
        * XXX the 74xx series also has this sensor, but it is not
        * XXX supported by Motorola and may return values that are off by
        * XXX 35-55 degrees C.
        */
       if (vers == MPC750 || vers == IBM750FX || vers == IBM750GX)
               cpu_tau_setup(ci);
#endif

#if defined(PPC_OEA64) || defined(PPC_OEA64_BRIDGE)
       if (vers == IBM970MP)
               init_scom_speedctl();
#endif

       evcnt_attach_dynamic(&ci->ci_ev_clock, EVCNT_TYPE_INTR,
               NULL, xname, "clock");
       evcnt_attach_dynamic(&ci->ci_ev_traps, EVCNT_TYPE_TRAP,
               NULL, xname, "traps");
       evcnt_attach_dynamic(&ci->ci_ev_kdsi, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "kernel DSI traps");
       evcnt_attach_dynamic(&ci->ci_ev_udsi, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "user DSI traps");
       evcnt_attach_dynamic(&ci->ci_ev_udsi_fatal, EVCNT_TYPE_TRAP,
               &ci->ci_ev_udsi, xname, "user DSI failures");
       evcnt_attach_dynamic(&ci->ci_ev_kisi, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "kernel ISI traps");
       evcnt_attach_dynamic(&ci->ci_ev_isi, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "user ISI traps");
       evcnt_attach_dynamic(&ci->ci_ev_isi_fatal, EVCNT_TYPE_TRAP,
               &ci->ci_ev_isi, xname, "user ISI failures");
       evcnt_attach_dynamic(&ci->ci_ev_scalls, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "system call traps");
       evcnt_attach_dynamic(&ci->ci_ev_pgm, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "PGM traps");
       evcnt_attach_dynamic(&ci->ci_ev_fpu, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "FPU unavailable traps");
       evcnt_attach_dynamic(&ci->ci_ev_fpusw, EVCNT_TYPE_TRAP,
               &ci->ci_ev_fpu, xname, "FPU context switches");
       evcnt_attach_dynamic(&ci->ci_ev_ali, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "user alignment traps");
       evcnt_attach_dynamic(&ci->ci_ev_ali_fatal, EVCNT_TYPE_TRAP,
               &ci->ci_ev_ali, xname, "user alignment failures");
       evcnt_attach_dynamic(&ci->ci_ev_umchk, EVCNT_TYPE_TRAP,
               &ci->ci_ev_umchk, xname, "user MCHK failures");
       evcnt_attach_dynamic(&ci->ci_ev_vec, EVCNT_TYPE_TRAP,
               &ci->ci_ev_traps, xname, "AltiVec unavailable");
#ifdef ALTIVEC
       if (cpu_altivec) {
               evcnt_attach_dynamic(&ci->ci_ev_vecsw, EVCNT_TYPE_TRAP,
                   &ci->ci_ev_vec, xname, "AltiVec context switches");
       }
#endif
       evcnt_attach_dynamic(&ci->ci_ev_ipi, EVCNT_TYPE_INTR,
               NULL, xname, "IPIs");
}

/*
* According to a document labeled "PVR Register Settings":
** For integrated microprocessors the PVR register inside the device
** will identify the version of the microprocessor core. You must also
** read the Device ID, PCI register 02, to identify the part and the
** Revision ID, PCI register 08, to identify the revision of the
** integrated microprocessor.
* This apparently applies to 8240/8245/8241, PVR 00810101 and 80811014
*/

void
cpu_identify(char *str, size_t len)
{
       u_int pvr, major, minor;
       uint16_t vers, rev, revfmt;
       const struct cputab *cp;
       size_t n;

       pvr = mfpvr();
       vers = pvr >> 16;
       rev = pvr;

       switch (vers) {
       case MPC7410:
               minor = (pvr >> 0) & 0xff;
               major = minor <= 4 ? 1 : 2;
               break;
       case MPCG2: /*XXX see note above */
               major = (pvr >> 4) & 0xf;
               minor = (pvr >> 0) & 0xf;
               break;
       default:
               major = (pvr >>  8) & 0xf;
               minor = (pvr >>  0) & 0xf;
       }

       for (cp = models; cp->name[0] != '\0'; cp++) {
               if (cp->version == vers)
                       break;
       }

       if (cpu == -1)
               cpu = vers;

       revfmt = cp->revfmt;
       if (rev == MPC750 && pvr == 15) {
               revfmt = REVFMT_HEX;
       }
       if (vers == MPC750 && (pvr & 0xf000) == 0x7000 &&
                             (pvr & 0x0f00) >= 0x0100) {
               /* IBM Broadway */
               revfmt = REVFMT_HEX;
       }

       if (cp->name[0] != '\0') {
               n = snprintf(str, len, "%s (Revision ", cp->name);
       } else {
               n = snprintf(str, len, "Version %#x (Revision ", vers);
       }
       if (len > n) {
               switch (revfmt) {
               case REVFMT_MAJMIN:
                       snprintf(str + n, len - n, "%u.%u)", major, minor);
                       break;
               case REVFMT_HEX:
                       snprintf(str + n, len - n, "0x%04x)", rev);
                       break;
               case REVFMT_DEC:
                       snprintf(str + n, len - n, "%u)", rev);
                       break;
               }
       }
}

#ifdef L2CR_CONFIG
u_int l2cr_config = L2CR_CONFIG;
#else
u_int l2cr_config = 0;
#endif

#ifdef L3CR_CONFIG
u_int l3cr_config = L3CR_CONFIG;
#else
u_int l3cr_config = 0;
#endif

void
cpu_enable_l2cr(register_t l2cr)
{
       register_t msr, x;
       uint16_t vers;

       vers = mfpvr() >> 16;

       /* Disable interrupts and set the cache config bits. */
       msr = mfmsr();
       mtmsr(msr & ~PSL_EE);
#ifdef ALTIVEC
       if (cpu_altivec)
               __asm volatile("dssall");
#endif
       __asm volatile("sync");
       mtspr(SPR_L2CR, l2cr & ~L2CR_L2E);
       __asm volatile("sync");

       /* Wait for L2 clock to be stable (640 L2 clocks). */
       delay(100);

       /* Invalidate all L2 contents. */
       if (MPC745X_P(vers)) {
               mtspr(SPR_L2CR, l2cr | L2CR_L2I);
               do {
                       x = mfspr(SPR_L2CR);
               } while (x & L2CR_L2I);
       } else {
               mtspr(SPR_L2CR, l2cr | L2CR_L2I);
               do {
                       x = mfspr(SPR_L2CR);
               } while (x & L2CR_L2IP);
       }
       /* Enable L2 cache. */
       l2cr |= L2CR_L2E;
       mtspr(SPR_L2CR, l2cr);
       mtmsr(msr);
}

void
cpu_enable_l3cr(register_t l3cr)
{
       register_t x;

       /* By The Book (numbered steps from section 3.7.1.3 of MPC7450UM) */

       /*
        * 1: Set all L3CR bits for final config except L3E, L3I, L3PE, and
        *    L3CLKEN.  (also mask off reserved bits in case they were included
        *    in L3CR_CONFIG)
        */
       l3cr &= ~(L3CR_L3E|L3CR_L3I|L3CR_L3PE|L3CR_L3CLKEN|L3CR_RESERVED);
       mtspr(SPR_L3CR, l3cr);

       /* 2: Set L3CR[5] (otherwise reserved bit) to 1 */
       l3cr |= 0x04000000;
       mtspr(SPR_L3CR, l3cr);

       /* 3: Set L3CLKEN to 1*/
       l3cr |= L3CR_L3CLKEN;
       mtspr(SPR_L3CR, l3cr);

       /* 4/5: Perform a global cache invalidate (ref section 3.7.3.6) */
       __asm volatile("dssall;sync");
       /* L3 cache is already disabled, no need to clear L3E */
       mtspr(SPR_L3CR, l3cr|L3CR_L3I);
       do {
               x = mfspr(SPR_L3CR);
       } while (x & L3CR_L3I);

       /* 6: Clear L3CLKEN to 0 */
       l3cr &= ~L3CR_L3CLKEN;
       mtspr(SPR_L3CR, l3cr);

       /* 7: Perform a 'sync' and wait at least 100 CPU cycles */
       __asm volatile("sync");
       delay(100);

       /* 8: Set L3E and L3CLKEN */
       l3cr |= (L3CR_L3E|L3CR_L3CLKEN);
       mtspr(SPR_L3CR, l3cr);

       /* 9: Perform a 'sync' and wait at least 100 CPU cycles */
       __asm volatile("sync");
       delay(100);
}

void
cpu_config_l2cr(int pvr)
{
       register_t l2cr;
       u_int vers = (pvr >> 16) & 0xffff;

       l2cr = mfspr(SPR_L2CR);

       /*
        * For MP systems, the firmware may only configure the L2 cache
        * on the first CPU.  In this case, assume that the other CPUs
        * should use the same value for L2CR.
        */
       if ((l2cr & L2CR_L2E) != 0 && l2cr_config == 0) {
               l2cr_config = l2cr;
       }

       /*
        * Configure L2 cache if not enabled.
        */
       if ((l2cr & L2CR_L2E) == 0 && l2cr_config != 0) {
               cpu_enable_l2cr(l2cr_config);
               l2cr = mfspr(SPR_L2CR);
       }

       if ((l2cr & L2CR_L2E) == 0) {
               aprint_normal(" L2 cache present but not enabled ");
               return;
       }
       aprint_normal(",");

       switch (vers) {
       case IBM750FX:
       case IBM750GX:
               cpu_fmttab_print(cpu_ibm750_l2cr_formats, l2cr);
               break;
       case MPC750:
               if ((pvr & 0xffffff00) == 0x00082200 /* IBM750CX */ ||
                   (pvr & 0xffffef00) == 0x00082300 /* IBM750CXe */) {
                       cpu_fmttab_print(cpu_ibm750_l2cr_formats, l2cr);
               } else if ((pvr & 0xfffff0e0) == 0x00087000 /* IBM750CL */) {
                       cpu_fmttab_print(cpu_ibm750cl_l2cr_formats, l2cr);
               } else {
                       cpu_fmttab_print(cpu_l2cr_formats, l2cr);
               }
               break;
       case MPC7447A:
       case MPC7457:
               cpu_fmttab_print(cpu_7457_l2cr_formats, l2cr);
               return;
       case MPC7448:
               cpu_fmttab_print(cpu_7448_l2cr_formats, l2cr);
               return;
       case MPC7450:
       case MPC7455:
               cpu_fmttab_print(cpu_7450_l2cr_formats, l2cr);
               break;
       default:
               cpu_fmttab_print(cpu_l2cr_formats, l2cr);
               break;
       }
}

void
cpu_config_l3cr(int vers)
{
       register_t l2cr;
       register_t l3cr;

       l2cr = mfspr(SPR_L2CR);

       /*
        * For MP systems, the firmware may only configure the L2 cache
        * on the first CPU.  In this case, assume that the other CPUs
        * should use the same value for L2CR.
        */
       if ((l2cr & L2CR_L2E) != 0 && l2cr_config == 0) {
               l2cr_config = l2cr;
       }

       /*
        * Configure L2 cache if not enabled.
        */
       if ((l2cr & L2CR_L2E) == 0 && l2cr_config != 0) {
               cpu_enable_l2cr(l2cr_config);
               l2cr = mfspr(SPR_L2CR);
       }

       aprint_normal(",");
       switch (vers) {
       case MPC7447A:
       case MPC7457:
               cpu_fmttab_print(cpu_7457_l2cr_formats, l2cr);
               return;
       case MPC7448:
               cpu_fmttab_print(cpu_7448_l2cr_formats, l2cr);
               return;
       default:
               cpu_fmttab_print(cpu_7450_l2cr_formats, l2cr);
               break;
       }

       l3cr = mfspr(SPR_L3CR);

       /*
        * For MP systems, the firmware may only configure the L3 cache
        * on the first CPU.  In this case, assume that the other CPUs
        * should use the same value for L3CR.
        */
       if ((l3cr & L3CR_L3E) != 0 && l3cr_config == 0) {
               l3cr_config = l3cr;
       }

       /*
        * Configure L3 cache if not enabled.
        */
       if ((l3cr & L3CR_L3E) == 0 && l3cr_config != 0) {
               cpu_enable_l3cr(l3cr_config);
               l3cr = mfspr(SPR_L3CR);
       }

       if (l3cr & L3CR_L3E) {
               aprint_normal(",");
               cpu_fmttab_print(cpu_7450_l3cr_formats, l3cr);
       }
}

void
cpu_probe_speed(struct cpu_info *ci)
{
       uint64_t cps;

       mtspr(SPR_MMCR0, MMCR0_FC);
       mtspr(SPR_PMC1, 0);
       mtspr(SPR_MMCR0, MMCR0_PMC1SEL(PMCN_CYCLES));
       delay(100000);
       cps = (mfspr(SPR_PMC1) * 10) + 4999;

       mtspr(SPR_MMCR0, MMCR0_FC);

       ci->ci_khz = (cps * cpu_get_dfs()) / 1000;
}

/*
* Read the Dynamic Frequency Switching state and return a divisor for
* the maximum frequency.
*/
int
cpu_get_dfs(void)
{
       u_int pvr, vers;

       pvr = mfpvr();
       vers = pvr >> 16;

       switch (vers) {
       case MPC7448:
               if (mfspr(SPR_HID1) & HID1_DFS4)
                       return 4;
               /* FALLTHROUGH */
       case MPC7447A:
               if (mfspr(SPR_HID1) & HID1_DFS2)
                       return 2;
       }
       return 1;
}

/*
* Set the Dynamic Frequency Switching divisor the same for all cpus.
*/
void
cpu_set_dfs(int div)
{
       u_int dfs_mask, pvr, vers;

       pvr = mfpvr();
       vers = pvr >> 16;
       dfs_mask = 0;

       switch (vers) {
       case MPC7448:
               dfs_mask |= HID1_DFS4;
               /* FALLTHROUGH */
       case MPC7447A:
               dfs_mask |= HID1_DFS2;
               break;
       default:
               printf("cpu_set_dfs: DFS not supported\n");
               return;

       }
#ifdef MULTIPROCESSOR
       uint64_t where;
       where = xc_broadcast(0, (xcfunc_t)cpu_set_dfs_xcall, &div, &dfs_mask);
       xc_wait(where);
#else
       cpu_set_dfs_xcall(&div, &dfs_mask);
#endif
}

static void
cpu_set_dfs_xcall(void *arg1, void *arg2)
{
       u_int dfs_mask, hid1, old_hid1;
       int *divisor, s;

       divisor = arg1;
       dfs_mask = *(u_int *)arg2;

       s = splhigh();
       hid1 = old_hid1 = mfspr(SPR_HID1);

       switch (*divisor) {
       case 1:
               hid1 &= ~dfs_mask;
               break;
       case 2:
               hid1 &= ~(dfs_mask & HID1_DFS4);
               hid1 |= dfs_mask & HID1_DFS2;
               break;
       case 4:
               hid1 &= ~(dfs_mask & HID1_DFS2);
               hid1 |= dfs_mask & HID1_DFS4;
               break;
       }

       if (hid1 != old_hid1) {
               __asm volatile("sync");
               mtspr(SPR_HID1, hid1);
               __asm volatile("sync;isync");
       }

       splx(s);
}

#if NSYSMON_ENVSYS > 0
void
cpu_tau_setup(struct cpu_info *ci)
{
       struct sysmon_envsys *sme;
       int error, therm_delay;
       u_int pvr, vers;

       pvr = mfpvr();
       vers = pvr >> 16;

       if (vers == MPC750 && (pvr & 0xf000) == 0x7000 &&
                             (pvr & 0x0f00) >= 0x0100) {
               /* Broadway has dummy TAU registers, just ignore it. */
               return;
       }

       mtspr(SPR_THRM1, SPR_THRM_VALID);
       mtspr(SPR_THRM2, 0);

       /*
        * we need to figure out how much 20+us in units of CPU clock cycles
        * are
        */

       therm_delay = ci->ci_khz / 40;          /* 25us just to be safe */

       mtspr(SPR_THRM3, SPR_THRM_TIMER(therm_delay) | SPR_THRM_ENABLE);

       sme = sysmon_envsys_create();

       sensor.units = ENVSYS_STEMP;
       sensor.state = ENVSYS_SINVALID;
       (void)strlcpy(sensor.desc, "CPU Temp", sizeof(sensor.desc));
       if (sysmon_envsys_sensor_attach(sme, &sensor)) {
               sysmon_envsys_destroy(sme);
               return;
       }

       sme->sme_name = device_xname(ci->ci_dev);
       sme->sme_cookie = ci;
       sme->sme_refresh = cpu_tau_refresh;

       if ((error = sysmon_envsys_register(sme)) != 0) {
               aprint_error_dev(ci->ci_dev,
                   " unable to register with sysmon (%d)\n", error);
               sysmon_envsys_destroy(sme);
       }
}

/* Find the temperature of the CPU. */
void
cpu_tau_refresh(struct sysmon_envsys *sme, envsys_data_t *edata)
{
       int i, threshold, count;

       threshold = 64; /* Half of the 7-bit sensor range */

       /* Successive-approximation code adapted from Motorola
        * application note AN1800/D, "Programming the Thermal Assist
        * Unit in the MPC750 Microprocessor".
        */
       for (i = 5; i >= 0 ; i--) {
               mtspr(SPR_THRM1,
                   SPR_THRM_THRESHOLD(threshold) | SPR_THRM_VALID);
               count = 0;
               while ((count < 100000) &&
                   ((mfspr(SPR_THRM1) & SPR_THRM_TIV) == 0)) {
                       count++;
                       delay(1);
               }
               if (mfspr(SPR_THRM1) & SPR_THRM_TIN) {
                       /* The interrupt bit was set, meaning the
                        * temperature was above the threshold
                        */
                       threshold += 1 << i;
               } else {
                       /* Temperature was below the threshold */
                       threshold -= 1 << i;
               }
       }
       threshold += 2;

       /* Convert the temperature in degrees C to microkelvin */
       edata->value_cur = (threshold * 1000000) + 273150000;
       edata->state = ENVSYS_SVALID;
}
#endif /* NSYSMON_ENVSYS > 0 */

#ifdef MULTIPROCESSOR
volatile u_int cpu_spinstart_ack, cpu_spinstart_cpunum;

int
cpu_spinup(device_t self, struct cpu_info *ci)
{
       volatile struct cpu_hatch_data hatch_data, *h = &hatch_data;
       struct pglist mlist;
       int i, error;
       char *hp;

       KASSERT(ci != curcpu());

       /* Now allocate a hatch stack */
       error = uvm_pglistalloc(HATCH_STACK_SIZE, 0x10000, 0x10000000, 16, 0,
           &mlist, 1, 1);
       if (error) {
               aprint_error(": unable to allocate hatch stack\n");
               return -1;
       }

       hp = (void *)VM_PAGE_TO_PHYS(TAILQ_FIRST(&mlist));
       memset(hp, 0, HATCH_STACK_SIZE);

       /* Initialize secondary cpu's initial lwp to its idlelwp. */
       ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
       ci->ci_curpcb = lwp_getpcb(ci->ci_curlwp);
       ci->ci_curpm = ci->ci_curpcb->pcb_pm;
       ci->ci_battable = battable;

       cpu_hatch_data = h;
       h->hatch_running = 0;
       h->hatch_self = self;
       h->hatch_ci = ci;
       h->hatch_pir = ci->ci_cpuid;

       cpu_hatch_stack = (uint32_t)hp + HATCH_STACK_SIZE - CALLFRAMELEN;
       ci->ci_lasttb = cpu_info[0].ci_lasttb;

       /* copy special registers */

       h->hatch_hid0 = mfspr(SPR_HID0);
#if defined(PPC_OEA64_BRIDGE) || defined (_ARCH_PPC64)
       h->hatch_hid1 = mfspr(SPR_HID1);
       h->hatch_hid4 = mfspr(SPR_HID4);
       h->hatch_hid5 = mfspr(SPR_HID5);
#endif

       __asm volatile ("mfsdr1 %0" : "=r"(h->hatch_sdr1));
       for (i = 0; i < 16; i++) {
               __asm ("mfsrin %0,%1" : "=r"(h->hatch_sr[i]) :
                      "r"(i << ADDR_SR_SHFT));
       }
       if (oeacpufeat & OEACPU_64)
               h->hatch_asr = mfspr(SPR_ASR);
       else
               h->hatch_asr = 0;

       if ((oeacpufeat & OEACPU_NOBAT) == 0) {
               /* copy the bat regs */
               __asm volatile ("mfibatu %0,0" : "=r"(h->hatch_ibatu[0]));
               __asm volatile ("mfibatl %0,0" : "=r"(h->hatch_ibatl[0]));
               __asm volatile ("mfibatu %0,1" : "=r"(h->hatch_ibatu[1]));
               __asm volatile ("mfibatl %0,1" : "=r"(h->hatch_ibatl[1]));
               __asm volatile ("mfibatu %0,2" : "=r"(h->hatch_ibatu[2]));
               __asm volatile ("mfibatl %0,2" : "=r"(h->hatch_ibatl[2]));
               __asm volatile ("mfibatu %0,3" : "=r"(h->hatch_ibatu[3]));
               __asm volatile ("mfibatl %0,3" : "=r"(h->hatch_ibatl[3]));
               __asm volatile ("mfdbatu %0,0" : "=r"(h->hatch_dbatu[0]));
               __asm volatile ("mfdbatl %0,0" : "=r"(h->hatch_dbatl[0]));
               __asm volatile ("mfdbatu %0,1" : "=r"(h->hatch_dbatu[1]));
               __asm volatile ("mfdbatl %0,1" : "=r"(h->hatch_dbatl[1]));
               __asm volatile ("mfdbatu %0,2" : "=r"(h->hatch_dbatu[2]));
               __asm volatile ("mfdbatl %0,2" : "=r"(h->hatch_dbatl[2]));
               __asm volatile ("mfdbatu %0,3" : "=r"(h->hatch_dbatu[3]));
               __asm volatile ("mfdbatl %0,3" : "=r"(h->hatch_dbatl[3]));
               __asm volatile ("sync; isync");
       }

       if (md_setup_trampoline(h, ci) == -1)
               return -1;
       md_presync_timebase(h);
       md_start_timebase(h);

       /* wait for secondary printf */

       delay(200000);

#ifdef CACHE_PROTO_MEI
       __asm volatile ("dcbi 0,%0"::"r"(&h->hatch_running):"memory");
       __asm volatile ("sync; isync");
       __asm volatile ("dcbst 0,%0"::"r"(&h->hatch_running):"memory");
       __asm volatile ("sync; isync");
#endif
       int hatch_bail = 0;
       while ((h->hatch_running < 1) && (hatch_bail < 100000)) {
               delay(1);
               hatch_bail++;
#ifdef CACHE_PROTO_MEI
               __asm volatile ("dcbi 0,%0"::"r"(&h->hatch_running):"memory");
               __asm volatile ("sync; isync");
               __asm volatile ("dcbst 0,%0"::"r"(&h->hatch_running):"memory");
               __asm volatile ("sync; isync");
#endif
       }
       if (h->hatch_running < 1) {
#ifdef CACHE_PROTO_MEI
               __asm volatile ("dcbi 0,%0"::"r"(&cpu_spinstart_ack):"memory");
               __asm volatile ("sync; isync");
               __asm volatile ("dcbst 0,%0"::"r"(&cpu_spinstart_ack):"memory");
               __asm volatile ("sync; isync");
#endif
               aprint_error("%d:CPU %d didn't start %d\n", cpu_spinstart_ack,
                   ci->ci_cpuid, cpu_spinstart_ack);
               console_debugger();
               return -1;
       }

       return 0;
}

static volatile int start_secondary_cpu;

register_t
cpu_hatch(void)
{
       volatile struct cpu_hatch_data *h = cpu_hatch_data;
       struct cpu_info * const ci = h->hatch_ci;
       struct pcb *pcb;
       u_int msr;
       int i;

       /* Initialize timebase. */
       __asm ("mttbl %0; mttbu %0; mttbl %0" :: "r"(0));

       /*
        * Set PIR (Processor Identification Register).  i.e. whoami
        * Note that PIR is read-only on some CPU versions, so we write to it
        * only if it has a different value than we need.
        */

       msr = mfspr(SPR_PIR);
       if (msr != h->hatch_pir)
               mtspr(SPR_PIR, h->hatch_pir);

       __asm volatile ("mtsprg0 %0" :: "r"(ci));
       curlwp = ci->ci_curlwp;
       cpu_spinstart_ack = 0;

       if ((oeacpufeat & OEACPU_NOBAT) == 0) {
               /* Initialize MMU. */
               __asm ("mtibatu 0,%0" :: "r"(h->hatch_ibatu[0]));
               __asm ("mtibatl 0,%0" :: "r"(h->hatch_ibatl[0]));
               __asm ("mtibatu 1,%0" :: "r"(h->hatch_ibatu[1]));
               __asm ("mtibatl 1,%0" :: "r"(h->hatch_ibatl[1]));
               __asm ("mtibatu 2,%0" :: "r"(h->hatch_ibatu[2]));
               __asm ("mtibatl 2,%0" :: "r"(h->hatch_ibatl[2]));
               __asm ("mtibatu 3,%0" :: "r"(h->hatch_ibatu[3]));
               __asm ("mtibatl 3,%0" :: "r"(h->hatch_ibatl[3]));
               __asm ("mtdbatu 0,%0" :: "r"(h->hatch_dbatu[0]));
               __asm ("mtdbatl 0,%0" :: "r"(h->hatch_dbatl[0]));
               __asm ("mtdbatu 1,%0" :: "r"(h->hatch_dbatu[1]));
               __asm ("mtdbatl 1,%0" :: "r"(h->hatch_dbatl[1]));
               __asm ("mtdbatu 2,%0" :: "r"(h->hatch_dbatu[2]));
               __asm ("mtdbatl 2,%0" :: "r"(h->hatch_dbatl[2]));
               __asm ("mtdbatu 3,%0" :: "r"(h->hatch_dbatu[3]));
               __asm ("mtdbatl 3,%0" :: "r"(h->hatch_dbatl[3]));
       }

#ifdef PPC_OEA64_BRIDGE
       if ((oeacpufeat & OEACPU_64_BRIDGE) != 0) {

               mtspr64(SPR_HID0, h->hatch_hid0);
               mtspr64(SPR_HID1, h->hatch_hid1);
               mtspr64(SPR_HID4, h->hatch_hid4);
               mtspr64(SPR_HID5, h->hatch_hid5);
               mtspr64(SPR_HIOR, 0);
       } else
#endif
               mtspr(SPR_HID0, h->hatch_hid0);

       if ((oeacpufeat & OEACPU_NOBAT) == 0) {
               __asm ("mtibatl 0,%0; mtibatu 0,%1; mtdbatl 0,%0; mtdbatu 0,%1;"
                   :: "r"(battable[0].batl), "r"(battable[0].batu));
       }

       __asm volatile ("sync");
       for (i = 0; i < 16; i++)
               __asm ("mtsrin %0,%1" :: "r"(h->hatch_sr[i]), "r"(i << ADDR_SR_SHFT));
       __asm volatile ("sync; isync");

       if (oeacpufeat & OEACPU_64)
               mtspr(SPR_ASR, h->hatch_asr);

       cpu_spinstart_ack = 1;
       __asm ("ptesync");
       __asm ("mtsdr1 %0" :: "r"(h->hatch_sdr1));
       __asm volatile ("sync; isync");

       cpu_spinstart_ack = 5;
       for (i = 0; i < 16; i++)
               __asm ("mfsrin %0,%1" : "=r"(h->hatch_sr[i]) :
                      "r"(i << ADDR_SR_SHFT));

       /* Enable I/D address translations. */
       msr = mfmsr();
       msr |= PSL_IR|PSL_DR|PSL_ME|PSL_RI;
       mtmsr(msr);
       __asm volatile ("sync; isync");
       cpu_spinstart_ack = 2;

       md_sync_timebase(h);

       cpu_setup(h->hatch_self, ci);

       h->hatch_running = 1;
       __asm volatile ("sync; isync");

       while (start_secondary_cpu == 0)
               ;

       __asm volatile ("sync; isync");

       aprint_normal("cpu%d started\n", curcpu()->ci_index);
       __asm volatile ("mtdec %0" :: "r"(ticks_per_intr));

       md_setup_interrupts();

       ci->ci_ipending = 0;
       ci->ci_cpl = 0;

       mtmsr(mfmsr() | PSL_EE);
       pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp);
       return pcb->pcb_sp;
}

void
cpu_boot_secondary_processors(void)
{
       start_secondary_cpu = 1;
       __asm volatile ("sync");
}

#endif /*MULTIPROCESSOR*/