/*      $NetBSD$ */

/*-
* Copyright (c) 2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jukka Ruohonen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_cpufreq.c,v 1.15 2011/09/02 22:25:08 Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/cpufreq.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/xcall.h>

static int                cpufreq_latency(void);
static uint16_t           cpufreq_get_max(void);
static uint16_t           cpufreq_get_min(void);

static struct cpufreq_if *cpufreq_if __read_mostly = NULL;

int
cpufreq_register(struct cpufreq_if *cif)
{
       size_t count, i, j, k;
       uint16_t m;
       int rv;

       KASSERT(cif != NULL);
       KASSERT(cif->get_freq != NULL);
       KASSERT(cif->set_freq != NULL);
       KASSERT(cif->state_count > 0);
       KASSERT(cif->state_count < CPUFREQ_STATE_MAX);

       mutex_enter(&cpu_lock);

       if (cpufreq_if != NULL) {
               mutex_exit(&cpu_lock);
               return EALREADY;
       }

       mutex_exit(&cpu_lock);
       cpufreq_if = kmem_zalloc(sizeof(*cif), KM_SLEEP);

       if (cpufreq_if == NULL)
               return ENOMEM;

       mutex_enter(&cpu_lock);

       cpufreq_if->cookie = cif->cookie;
       cpufreq_if->get_freq = cif->get_freq;
       cpufreq_if->set_freq = cif->set_freq;

       (void)strlcpy(cpufreq_if->name, cif->name, sizeof(cif->name));

       /*
        * Sanity check the values and verify descending order.
        */
       for (count = i = 0; i < cif->state_count; i++) {

               if (cif->state[i].freq == 0 || cif->state[i].freq > 9999)
                       continue;

               for (j = k = 0; j < i; j++) {

                       if (cif->state[i].freq >= cif->state[j].freq) {
                               k = 1;
                               break;
                       }
               }

               if (k != 0)
                       continue;

               count++;
               cpufreq_if->state[i].freq = cif->state[i].freq;
               cpufreq_if->state[i].power = cif->state[i].power;
       }

       cpufreq_if->state_count = count;

       if (cpufreq_if->state_count == 0) {
               mutex_exit(&cpu_lock);
               cpufreq_deregister();
               return ENODEV;
       }

       rv = cpufreq_latency();

       if (rv != 0) {
               mutex_exit(&cpu_lock);
               cpufreq_deregister();
               return rv;
       }

       m = cpufreq_get_max();
       mutex_exit(&cpu_lock);
       cpufreq_set_all(m);

       return 0;
}

void
cpufreq_deregister(void)
{

       mutex_enter(&cpu_lock);

       if (cpufreq_if == NULL) {
               mutex_exit(&cpu_lock);
               return;
       }

       mutex_exit(&cpu_lock);
       kmem_free(cpufreq_if, sizeof(*cpufreq_if));
       cpufreq_if = NULL;
}

static int
cpufreq_latency(void)
{
       struct timespec nta, ntb;
       const size_t n = 20;
       size_t i, j;
       uint64_t s;

       /*
        * For each state, sample the average transition
        * latency required to set the state for all CPUs.
        * Few rounds are required to even the possible
        * caching done in the backend.
        */
       for (i = 0; i < cpufreq_if->state_count; i++) {

               for (s = 0, j = 0; j < n; j++) {

                       nta.tv_sec = nta.tv_nsec = 0;
                       ntb.tv_sec = ntb.tv_nsec = 0;

                       nanotime(&nta);
                       mutex_exit(&cpu_lock);
                       cpufreq_set_all(cpufreq_if->state[i].freq);
                       mutex_enter(&cpu_lock);
                       nanotime(&ntb);
                       timespecsub(&ntb, &nta, &ntb);

                       if (ntb.tv_sec != 0 ||
                           ntb.tv_nsec > CPUFREQ_LATENCY_MAX)
                               continue;

                       if (s >= UINT64_MAX - CPUFREQ_LATENCY_MAX)
                               break;

                       s += ntb.tv_nsec;
               }

               /*
                * Consider the backend unsuitable if
                * the transition latency was too high.
                */
               if (s == 0)
                       return EMSGSIZE;

               cpufreq_if->state[i].latency = s / n;
       }

       return 0;
}

void
cpufreq_suspend(struct cpu_info *ci)
{
       struct cpufreq_if *cif;
       uint16_t l;

       mutex_enter(&cpu_lock);
       cif = cpufreq_if;

       if (__predict_false(cif == NULL)) {
               mutex_exit(&cpu_lock);
               return;
       }

       cif->saved_freq = 0;
       l = cpufreq_get_min();
       mutex_exit(&cpu_lock);

       cpufreq_get(ci, &cif->saved_freq);
       cpufreq_set(ci, l);
}

void
cpufreq_resume(struct cpu_info *ci)
{
       struct cpufreq_if *cif;

       mutex_enter(&cpu_lock);
       cif = cpufreq_if;

       if (__predict_false(cif == NULL)) {
               mutex_exit(&cpu_lock);
               return;
       }

       if (__predict_false(cif->saved_freq == 0)) {
               mutex_exit(&cpu_lock);
               return;
       }

       mutex_exit(&cpu_lock);
       cpufreq_set(ci, cif->saved_freq);
}

void
cpufreq_get(struct cpu_info *ci, uint16_t *freq)
{
       struct cpufreq_if *cif;
       uint64_t xc;

       mutex_enter(&cpu_lock);
       cif = cpufreq_if;

       if (__predict_false(cif == NULL)) {
               mutex_exit(&cpu_lock);
               return;
       }

       xc = xc_unicast(0, (*cif->get_freq), cif->cookie, freq, ci);
       xc_wait(xc);

       mutex_exit(&cpu_lock);
}

static uint16_t
cpufreq_get_max(void)
{
       struct cpufreq_if *cif = cpufreq_if;

       KASSERT(cif != NULL);
       KASSERT(mutex_owned(&cpu_lock) != 0);

       return cif->state[0].freq;
}

static uint16_t
cpufreq_get_min(void)
{
       struct cpufreq_if *cif = cpufreq_if;

       KASSERT(cif != NULL);
       KASSERT(mutex_owned(&cpu_lock) != 0);

       return cif->state[cif->state_count - 1].freq;
}

int
cpufreq_get_if(struct cpufreq_if *cif)
{

       mutex_enter(&cpu_lock);

       if (__predict_false(cpufreq_if == NULL)) {
               mutex_exit(&cpu_lock);
               return ENODEV;
       }

       memcpy(cif, cpufreq_if, sizeof(*cif));
       mutex_exit(&cpu_lock);

       return 0;
}

void
cpufreq_set(struct cpu_info *ci, uint16_t freq)
{
       struct cpufreq_if *cif;
       uint64_t xc;

       mutex_enter(&cpu_lock);
       cif = cpufreq_if;

       if (__predict_false(cif == NULL)) {
               mutex_exit(&cpu_lock);
               return;
       }

       xc = xc_unicast(0, (*cif->set_freq), cif->cookie, &freq, ci);
       xc_wait(xc);

       mutex_exit(&cpu_lock);
}

void
cpufreq_set_all(uint16_t freq)
{
       struct cpufreq_if *cif;
       uint64_t xc;

       mutex_enter(&cpu_lock);
       cif = cpufreq_if;

       if (__predict_false(cif == NULL)) {
               mutex_exit(&cpu_lock);
               return;
       }

       xc = xc_broadcast(0, (*cif->set_freq), cif->cookie, &freq);
       xc_wait(xc);

       mutex_exit(&cpu_lock);
}

/*-
* Copyright (c) 2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jukka Ruohonen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SYS_CPUFREQ_H_
#define _SYS_CPUFREQ_H_

#ifndef _KERNEL
#include <stdbool.h>
#endif

#ifdef _KERNEL
#ifndef _SYS_XCALL_H_
#include <sys/xcall.h>
#endif
#endif

#define CPUFREQ_NAME_MAX        16
#define CPUFREQ_STATE_MAX       255             /* Maximum number of states */
#define CPUFREQ_LATENCY_MAX     UINT16_MAX      /* Maximum per-CPU latency  */

struct cpufreq_state {
       uint16_t                 freq;          /* MHz  */
       uint16_t                 power;         /* mW   */
       uint32_t                 latency;       /* nsec */
};

struct cpufreq_if {
       char                     name[CPUFREQ_NAME_MAX];
       struct cpufreq_state     state[CPUFREQ_STATE_MAX];
       uint16_t                 state_count;
       uint16_t                 state_target;
       bool                     mp;

#ifdef _KERNEL
       void                    *cookie;
       xcfunc_t                 get_freq;
       xcfunc_t                 set_freq;
       uint16_t                 saved_freq;
#endif  /* _KERNEL */
};

#ifdef _KERNEL
int     cpufreq_register(struct cpufreq_if *);
void    cpufreq_deregister(void);
void    cpufreq_suspend(struct cpu_info *);
void    cpufreq_resume(struct cpu_info *);
void    cpufreq_get(struct cpu_info *, uint16_t *);
int     cpufreq_get_if(struct cpufreq_if *);
void    cpufreq_set(struct cpu_info *, uint16_t);
void    cpufreq_set_all(uint16_t);
#endif  /* _KERNEL */

#endif /* _SYS_CPUFREQ_H_ */

Index: kern_cpu.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_cpu.c,v
retrieving revision 1.51
diff -u -p -r1.51 kern_cpu.c
--- kern_cpu.c  11 Sep 2011 14:54:49 -0000      1.51
+++ kern_cpu.c  26 Sep 2011 15:58:20 -0000
@@ -177,8 +177,9 @@ int
cpuctl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
       CPU_INFO_ITERATOR cii;
-       cpustate_t *cs;
+       struct cpufreq_if cif;
       struct cpu_info *ci;
+       cpustate_t *cs;
       int error, i;
       u_int id;

@@ -201,6 +202,19 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
               }
               error = cpu_setintr(ci, cs->cs_intr);
               error = cpu_setstate(ci, cs->cs_online);
+
+               if (cs->cs_cpufreq.state_target == 0)
+                       break;
+
+               mutex_exit(&cpu_lock);
+
+               if (cs->cs_cpufreq.mp != false)
+                       cpufreq_set(ci, cs->cs_cpufreq.state_target);
+               else {
+                       cpufreq_set_all(cs->cs_cpufreq.state_target);
+               }
+
+               mutex_enter(&cpu_lock);
               break;

       case IOC_CPU_GETSTATE:
@@ -208,6 +222,7 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
                       cs = data;
               id = cs->cs_id;
               memset(cs, 0, sizeof(*cs));
+               memset(&cif, 0, sizeof(struct cpufreq_if));
               cs->cs_id = id;
               if (cs->cs_id >= maxcpus ||
                   (ci = cpu_lookup(id)) == NULL) {
@@ -227,6 +242,26 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
                   (ci->ci_schedstate.spc_lastmod >> 32);
               cs->cs_intrcnt = cpu_intr_count(ci) + 1;
               cs->cs_hwid = ci->ci_cpuid;
+
+               mutex_exit(&cpu_lock);
+               error = cpufreq_get_if(&cif);
+               mutex_enter(&cpu_lock);
+
+               if (error != 0) {
+                       error = 0;
+                       break;
+               }
+
+               cs->cs_cpufreq.mp = cif.mp;
+               cs->cs_cpufreq.state_count = cif.state_count;
+               strlcpy(cs->cs_cpufreq.name, cif.name, sizeof(cif.name));
+
+               for (i = 0; i < cif.state_count; i++) {
+                       cs->cs_cpufreq.state[i].freq = cif.state[i].freq;
+                       cs->cs_cpufreq.state[i].power = cif.state[i].power;
+                       cs->cs_cpufreq.state[i].latency = cif.state[i].latency;
+               }
+
               break;

       case IOC_CPU_MAPID:
? cpufreq.h
Index: Makefile
===================================================================
RCS file: /cvsroot/src/sys/sys/Makefile,v
retrieving revision 1.137
diff -u -p -r1.137 Makefile
--- Makefile    7 Aug 2011 13:33:02 -0000       1.137
+++ Makefile    26 Sep 2011 15:58:36 -0000
@@ -8,7 +8,7 @@ INCS=   acct.h agpio.h aio.h ansi.h aout_m
       bitops.h bootblock.h bswap.h buf.h \
       callback.h callout.h cdefs.h cdefs_aout.h \
       cdefs_elf.h cdio.h chio.h clockctl.h condvar.h conf.h core.h \
-       cpuio.h ctype_bits.h ctype_inline.h \
+       cpufreq.h cpuio.h ctype_bits.h ctype_inline.h \
       device.h device_if.h \
       dir.h dirent.h \
       disk.h disklabel.h disklabel_acorn.h disklabel_gpt.h disklabel_rdb.h \
Index: cpuio.h
===================================================================
RCS file: /cvsroot/src/sys/sys/cpuio.h,v
retrieving revision 1.5
diff -u -p -r1.5 cpuio.h
--- cpuio.h     11 Sep 2011 14:54:49 -0000      1.5
+++ cpuio.h     26 Sep 2011 15:58:36 -0000
@@ -35,6 +35,7 @@
#include <sys/types.h>
#include <sys/time.h>
#include <sys/ioccom.h>
+#include <sys/cpufreq.h>

#ifndef _KERNEL
#include <stdbool.h>
@@ -45,16 +46,17 @@
 * are better returned via autoconf.
 */
typedef struct cpustate {
-       u_int           cs_id;          /* matching ci_cpuid */
-       bool            cs_online;      /* running unbound LWPs */
-       bool            cs_intr;        /* fielding interrupts */
-       bool            cs_unused[2];   /* reserved */
-       int32_t         cs_lastmod;     /* time of last state change */
-       char            cs_name[16];    /* reserved */
-       int32_t         cs_lastmodhi;   /* time of last state change */
-       uint32_t        cs_intrcnt;     /* count of interrupt handlers + 1 */
-       uint32_t        cs_hwid;        /* hardware id */
-       uint32_t        cs_reserved;    /* reserved */
+       u_int             cs_id;        /* matching ci_cpuid */
+       bool              cs_online;    /* running unbound LWPs */
+       bool              cs_intr;      /* fielding interrupts */
+       bool              cs_unused[2]; /* reserved */
+       int32_t           cs_lastmod;   /* time of last state change */
+       char              cs_name[16];  /* reserved */
+       int32_t           cs_lastmodhi; /* time of last state change */
+       uint32_t          cs_intrcnt;   /* count of interrupt handlers + 1 */
+       uint32_t          cs_hwid;      /* hardware id */
+       uint32_t          cs_reserved;  /* reserved */
+       struct cpufreq_if cs_cpufreq;   /* cpufreq(9) */
} cpustate_t;

#define        IOC_CPU_SETSTATE        _IOW('c', 0, cpustate_t)
Index: files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.1027
diff -u -p -r1.1027 files
--- files       19 Sep 2011 08:53:30 -0000      1.1027
+++ files       26 Sep 2011 15:59:31 -0000
@@ -1525,6 +1525,7 @@ file      kern/subr_autoconf.c
file   kern/subr_blist.c               vmswap
file   kern/subr_bufq.c
file   kern/subr_callback.c
+file   kern/subr_cpufreq.c
file   kern/subr_copy.c
file   kern/subr_debug.c               debug
file   kern/subr_device.c