/*      $NetBSD: altq_rmclass.c,v 1.32 2025/01/08 13:00:04 joe Exp $    */
/*      $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $      */

/*
* Copyright (c) 1991-1997 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
*    must display the following acknowledgement:
*      This product includes software developed by the Network Research
*      Group at Lawrence Berkeley Laboratory.
* 4. Neither the name of the University nor of the Laboratory may be used
*    to endorse or promote products derived from this software without
*    specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* LBL code modified by [email protected], May 1977.
* For questions and/or comments, please send mail to [email protected]
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.32 2025/01/08 13:00:04 joe Exp $");

/* #ident "@(#)rm_class.c  1.48     97/12/05 SMI" */

#ifdef _KERNEL_OPT
#include "opt_altq.h"
#include "opt_inet.h"
#endif

#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/time.h>
#ifdef ALTQ3_COMPAT
#include <sys/kernel.h>
#endif
#include <sys/cprng.h>

#include <net/if.h>
#include <net/if_types.h>
#ifdef ALTQ3_COMPAT
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#endif

#include <altq/altq.h>
#include <altq/altq_rmclass.h>
#include <altq/altq_rmclass_debug.h>
#include <altq/altq_red.h>
#include <altq/altq_rio.h>

/*
* Local Macros
*/

#define reset_cutoff(ifd)       { ifd->cutoff_ = RM_MAXDEPTH; }

#define PSEC_TO_NSEC(t) ((t) / 1000)

/*
* Local routines.
*/

static int      rmc_satisfied(struct rm_class *, struct timespec *);
static void     rmc_wrr_set_weights(struct rm_ifdat *);
static void     rmc_depth_compute(struct rm_class *);
static void     rmc_depth_recompute(rm_class_t *);

static mbuf_t   *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
static mbuf_t   *_rmc_prr_dequeue_next(struct rm_ifdat *, int);

static int      _rmc_addq(rm_class_t *, mbuf_t *);
static void     _rmc_dropq(rm_class_t *);
static mbuf_t   *_rmc_getq(rm_class_t *);
static mbuf_t   *_rmc_pollq(rm_class_t *);

static int      rmc_under_limit(struct rm_class *, struct timespec *);
static void     rmc_tl_satisfied(struct rm_ifdat *, struct timespec *);
static void     rmc_drop_action(struct rm_class *);
static void     rmc_restart(struct rm_class *);
static void     rmc_root_overlimit(struct rm_class *, struct rm_class *);

#define BORROW_OFFTIME
/*
* BORROW_OFFTIME (experimental):
* borrow the offtime of the class borrowing from.
* the reason is that when its own offtime is set, the class is unable
* to borrow much, especially when cutoff is taking effect.
* but when the borrowed class is overloaded (advidle is close to minidle),
* use the borrowing class's offtime to avoid overload.
*/
#define ADJUST_CUTOFF
/*
* ADJUST_CUTOFF (experimental):
* if no underlimit class is found due to cutoff, increase cutoff and
* retry the scheduling loop.
* also, don't invoke delay_actions while cutoff is taking effect,
* since a sleeping class won't have a chance to be scheduled in the
* next loop.
*
* now heuristics for setting the top-level variable (cutoff_) becomes:
*      1. if a packet arrives for a not-overlimit class, set cutoff
*         to the depth of the class.
*      2. if cutoff is i, and a packet arrives for an overlimit class
*         with an underlimit ancestor at a lower level than i (say j),
*         then set cutoff to j.
*      3. at scheduling a packet, if there is no underlimit class
*         due to the current cutoff level, increase cutoff by 1 and
*         then try to schedule again.
*/

/*
* rm_class_t *
* rmc_newclass(...) - Create a new resource management class at priority
* 'pri' on the interface given by 'ifd'.
*
* nsecPerByte  is the data rate of the interface in nanoseconds/byte.
*              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
*              than 100% of the bandwidth, this number should be the
*              'effective' rate for the class.  Let f be the
*              bandwidth fraction allocated to this class, and let
*              nsPerByte be the data rate of the output link in
*              nanoseconds/byte.  Then nsecPerByte is set to
*              nsPerByte / f.  E.g., 1600 (= 800 / .5)
*              for a class that gets 50% of an ethernet's bandwidth.
*
* action       the routine to call when the class is over limit.
*
* maxq         max allowable queue size for class (in packets).
*
* parent       parent class pointer.
*
* borrow       class to borrow from (should be either 'parent' or null).
*
* maxidle      max value allowed for class 'idle' time estimate (this
*              parameter determines how large an initial burst of packets
*              can be before overlimit action is invoked.
*
* offtime      how long 'delay' action will delay when class goes over
*              limit (this parameter determines the steady-state burst
*              size when a class is running over its limit).
*
* Maxidle and offtime have to be computed from the following:  If the
* average packet size is s, the bandwidth fraction allocated to this
* class is f, we want to allow b packet bursts, and the gain of the
* averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
*
*   ptime = s * nsPerByte * (1 - f) / f
*   maxidle = ptime * (1 - g^b) / g^b
*   minidle = -ptime * (1 / (f - 1))
*   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
*
* Operationally, it's convenient to specify maxidle & offtime in units
* independent of the link bandwidth so the maxidle & offtime passed to
* this routine are the above values multiplied by 8*f/(1000*nsPerByte).
* (The constant factor is a scale factor needed to make the parameters
* integers.  This scaling also means that the 'unscaled' values of
* maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
* not nanoseconds.)  Also note that the 'idle' filter computation keeps
* an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
* maxidle also must be scaled upward by this value.  Thus, the passed
* values for maxidle and offtime can be computed as follows:
*
* maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
* offtime = offtime * 8 / (1000 * nsecPerByte)
*
* When USE_HRTIME is employed, then maxidle and offtime become:
*      maxidle = maxilde * (8.0 / nsecPerByte);
*      offtime = offtime * (8.0 / nsecPerByte);
*/
struct rm_class *
rmc_newclass(int pri, struct rm_ifdat *ifd, uint64_t psecPerByte,
   void (*action)(rm_class_t *, rm_class_t *), int maxq,
   struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
   int minidle, u_int offtime, int pktsize, int flags)
{
       struct rm_class *cl;
       struct rm_class *peer;
       int              s;

       if (pri >= RM_MAXPRIO)
               return NULL;
#ifndef ALTQ_RED
       if (flags & RMCF_RED) {
#ifdef ALTQ_DEBUG
               printf("rmc_newclass: RED not configured for CBQ!\n");
#endif
               return NULL;
       }
#endif
#ifndef ALTQ_RIO
       if (flags & RMCF_RIO) {
#ifdef ALTQ_DEBUG
               printf("rmc_newclass: RIO not configured for CBQ!\n");
#endif
               return NULL;
       }
#endif

       cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_WAITOK|M_ZERO);
       if (cl == NULL)
               return NULL;
       CALLOUT_INIT(&cl->callout_);

       cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
       if (cl->q_ == NULL) {
               free(cl, M_DEVBUF);
               return NULL;
       }

       /*
        * Class initialization.
        */
       cl->children_ = NULL;
       cl->parent_ = parent;
       cl->borrow_ = borrow;
       cl->leaf_ = 1;
       cl->ifdat_ = ifd;
       cl->pri_ = pri;
       cl->allotment_ = (u_int)(RM_PS_PER_SEC / psecPerByte); /* Bytes per sec */
       cl->depth_ = 0;
       cl->qthresh_ = 0;
       cl->ps_per_byte_ = psecPerByte;

       qlimit(cl->q_) = maxq;
       qtype(cl->q_) = Q_DROPHEAD;
       qlen(cl->q_) = 0;
       cl->flags_ = flags;

#if 1 /* minidle is also scaled in ALTQ */
       cl->minidle_ = ((int64_t)minidle * (int64_t)psecPerByte) / 8;
       if (cl->minidle_ > 0)
               cl->minidle_ = 0;
#else
       cl->minidle_ = minidle;
#endif
       cl->maxidle_ = ((int64_t)maxidle * (int64_t)psecPerByte) / 8;
       if (cl->maxidle_ == 0)
               cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
       cl->avgidle_ = cl->maxidle_;
       cl->offtime_ = (((int64_t)offtime * (int64_t)psecPerByte) / 8) >> RM_FILTER_GAIN;
       if (cl->offtime_ == 0)
               cl->offtime_ = 1;
#else
       cl->avgidle_ = 0;
       cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif
       cl->overlimit = action;

#ifdef ALTQ_RED
       if (flags & (RMCF_RED|RMCF_RIO)) {
               int red_flags, red_pkttime;

               red_flags = 0;
               if (flags & RMCF_ECN)
                       red_flags |= REDF_ECN;
               if (flags & RMCF_FLOWVALVE)
                       red_flags |= REDF_FLOWVALVE;
#ifdef ALTQ_RIO
               if (flags & RMCF_CLEARDSCP)
                       red_flags |= RIOF_CLEARDSCP;
#endif
               red_pkttime = PSEC_TO_NSEC(psecPerByte) * pktsize  / 1000;

               if (flags & RMCF_RED) {
                       cl->red_ = red_alloc(0, 0,
                           qlimit(cl->q_) * 10/100,
                           qlimit(cl->q_) * 30/100,
                           red_flags, red_pkttime);
                       if (cl->red_ != NULL)
                               qtype(cl->q_) = Q_RED;
               }
#ifdef ALTQ_RIO
               else {
                       cl->red_ = (red_t *)rio_alloc(0, NULL,
                                                     red_flags, red_pkttime);
                       if (cl->red_ != NULL)
                               qtype(cl->q_) = Q_RIO;
               }
#endif
       }
#endif /* ALTQ_RED */

       /*
        * put the class into the class tree
        */
       s = splnet();
       if ((peer = ifd->active_[pri]) != NULL) {
               /* find the last class at this pri */
               cl->peer_ = peer;
               while (peer->peer_ != ifd->active_[pri])
                       peer = peer->peer_;
               peer->peer_ = cl;
       } else {
               ifd->active_[pri] = cl;
               cl->peer_ = cl;
       }

       if (cl->parent_) {
               cl->next_ = parent->children_;
               parent->children_ = cl;
               parent->leaf_ = 0;
       }

       /*
        * Compute the depth of this class and its ancestors in the class
        * hierarchy.
        */
       rmc_depth_compute(cl);

       /*
        * If CBQ's WRR is enabled, then initialize the class WRR state.
        */
       if (ifd->wrr_) {
               ifd->num_[pri]++;
               ifd->alloc_[pri] += cl->allotment_;
               rmc_wrr_set_weights(ifd);
       }
       splx(s);
       return cl;
}

int
rmc_modclass(struct rm_class *cl, uint64_t psecPerByte, int maxq, u_int maxidle,
   int minidle, u_int offtime, int pktsize)
{
       struct rm_ifdat *ifd;
       u_int            old_allotment;
       int              s;

       ifd = cl->ifdat_;
       old_allotment = cl->allotment_;

       s = splnet();
       cl->allotment_ = (u_int)(RM_PS_PER_SEC / psecPerByte); /* Bytes per sec */
       cl->qthresh_ = 0;
       cl->ps_per_byte_ = psecPerByte;

       qlimit(cl->q_) = maxq;

#if 1 /* minidle is also scaled in ALTQ */
       cl->minidle_ = ((int64_t)minidle * (int64_t)psecPerByte) / 8;
       if (cl->minidle_ > 0)
               cl->minidle_ = 0;
#else
       cl->minidle_ = minidle;
#endif
       cl->maxidle_ = ((int64_t)maxidle * (int64_t)psecPerByte) / 8;
       if (cl->maxidle_ == 0)
               cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
       cl->avgidle_ = cl->maxidle_;
       cl->offtime_ = (((int64_t)offtime * (int64_t)psecPerByte) / 8) >> RM_FILTER_GAIN;
       if (cl->offtime_ == 0)
               cl->offtime_ = 1;
#else
       cl->avgidle_ = 0;
       cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif

       /*
        * If CBQ's WRR is enabled, then initialize the class WRR state.
        */
       if (ifd->wrr_) {
               ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
               rmc_wrr_set_weights(ifd);
       }
       splx(s);
       return 0;
}

/*
* static void
* rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
*      the appropriate run robin weights for the CBQ weighted round robin
*      algorithm.
*
*      Returns: NONE
*/

static void
rmc_wrr_set_weights(struct rm_ifdat *ifd)
{
       int             i;
       struct rm_class *cl, *clh;

       for (i = 0; i < RM_MAXPRIO; i++) {
               /*
                * This is inverted from that of the simulator to
                * maintain precision.
                */
               if (ifd->num_[i] == 0)
                       ifd->M_[i] = 0;
               else
                       ifd->M_[i] = ifd->alloc_[i] /
                               (ifd->num_[i] * ifd->maxpkt_);
               /*
                * Compute the weighted allotment for each class.
                * This takes the expensive div instruction out
                * of the main loop for the wrr scheduling path.
                * These only get recomputed when a class comes or
                * goes.
                */
               if (ifd->active_[i] != NULL) {
                       clh = cl = ifd->active_[i];
                       do {
                               /* safe-guard for slow link or alloc_ == 0 */
                               if (ifd->M_[i] == 0)
                                       cl->w_allotment_ = 0;
                               else
                                       cl->w_allotment_ = cl->allotment_ /
                                               ifd->M_[i];
                               cl = cl->peer_;
                       } while ((cl != NULL) && (cl != clh));
               }
       }
}

int
rmc_get_weight(struct rm_ifdat *ifd, int pri)
{
       if ((pri >= 0) && (pri < RM_MAXPRIO))
               return (ifd->M_[pri]);
       else
               return 0;
}

/*
* static void
* rmc_depth_compute(struct rm_class *cl) - This function computes the
*      appropriate depth of class 'cl' and its ancestors.
*
*      Returns:        NONE
*/

static void
rmc_depth_compute(struct rm_class *cl)
{
       rm_class_t      *t = cl, *p;

       /*
        * Recompute the depth for the branch of the tree.
        */
       while (t != NULL) {
               p = t->parent_;
               if (p && (t->depth_ >= p->depth_)) {
                       p->depth_ = t->depth_ + 1;
                       t = p;
               } else
                       t = NULL;
       }
}

/*
* static void
* rmc_depth_recompute(struct rm_class *cl) - This function re-computes
*      the depth of the tree after a class has been deleted.
*
*      Returns:        NONE
*/

static void
rmc_depth_recompute(rm_class_t *cl)
{
#if 1 /* ALTQ */
       rm_class_t      *p, *t;

       p = cl;
       while (p != NULL) {
               if ((t = p->children_) == NULL) {
                       p->depth_ = 0;
               } else {
                       int cdepth = 0;

                       while (t != NULL) {
                               if (t->depth_ > cdepth)
                                       cdepth = t->depth_;
                               t = t->next_;
                       }

                       if (p->depth_ == cdepth + 1)
                               /* no change to this parent */
                               return;

                       p->depth_ = cdepth + 1;
               }

               p = p->parent_;
       }
#else
       rm_class_t      *t;

       if (cl->depth_ >= 1) {
               if (cl->children_ == NULL) {
                       cl->depth_ = 0;
               } else if ((t = cl->children_) != NULL) {
                       while (t != NULL) {
                               if (t->children_ != NULL)
                                       rmc_depth_recompute(t);
                               t = t->next_;
                       }
               } else
                       rmc_depth_compute(cl);
       }
#endif
}

/*
* void
* rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
*      function deletes a class from the link-sharing structure and frees
*      all resources associated with the class.
*
*      Returns: NONE
*/

void
rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
{
       struct rm_class *p, *head, *previous;
       int              s;

       ASSERT(cl->children_ == NULL);

       if (cl->sleeping_)
               CALLOUT_STOP(&cl->callout_);

       s = splnet();
       /*
        * Free packets in the packet queue.
        * XXX - this may not be a desired behavior.  Packets should be
        *              re-queued.
        */
       rmc_dropall(cl);

       /*
        * If the class has a parent, then remove the class from the
        * class from the parent's children chain.
        */
       if (cl->parent_ != NULL) {
               head = cl->parent_->children_;
               p = previous = head;
               if (head->next_ == NULL) {
                       ASSERT(head == cl);
                       cl->parent_->children_ = NULL;
                       cl->parent_->leaf_ = 1;
               } else while (p != NULL) {
                       if (p == cl) {
                               if (cl == head)
                                       cl->parent_->children_ = cl->next_;
                               else
                                       previous->next_ = cl->next_;
                               cl->next_ = NULL;
                               p = NULL;
                       } else {
                               previous = p;
                               p = p->next_;
                       }
               }
       }

       /*
        * Delete class from class priority peer list.
        */
       if ((p = ifd->active_[cl->pri_]) != NULL) {
               /*
                * If there is more than one member of this priority
                * level, then look for class(cl) in the priority level.
                */
               if (p != p->peer_) {
                       while (p->peer_ != cl)
                               p = p->peer_;
                       p->peer_ = cl->peer_;

                       if (ifd->active_[cl->pri_] == cl)
                               ifd->active_[cl->pri_] = cl->peer_;
               } else {
                       ASSERT(p == cl);
                       ifd->active_[cl->pri_] = NULL;
               }
       }

       /*
        * Recompute the WRR weights.
        */
       if (ifd->wrr_) {
               ifd->alloc_[cl->pri_] -= cl->allotment_;
               ifd->num_[cl->pri_]--;
               rmc_wrr_set_weights(ifd);
       }

       /*
        * Re-compute the depth of the tree.
        */
#if 1 /* ALTQ */
       rmc_depth_recompute(cl->parent_);
#else
       rmc_depth_recompute(ifd->root_);
#endif

       splx(s);

       /*
        * Free the class structure.
        */
       if (cl->red_ != NULL) {
#ifdef ALTQ_RIO
               if (q_is_rio(cl->q_))
                       rio_destroy((rio_t *)cl->red_);
#endif
#ifdef ALTQ_RED
               if (q_is_red(cl->q_))
                       red_destroy(cl->red_);
#endif
       }
       free(cl->q_, M_DEVBUF);
       free(cl, M_DEVBUF);
}


/*
* int
* rmc_init(...) - Initialize the resource management data structures
*      associated with the output portion of interface 'ifp'.  'ifd' is
*      where the structures will be built (for backwards compatibility, the
*      structures aren't kept in the ifnet struct).  'nsecPerByte'
*      gives the link speed (inverse of bandwidth) in nanoseconds/byte.
*      'restart' is the driver-specific routine that the generic 'delay
*      until under limit' action will call to restart output.  `maxq'
*      is the queue size of the 'link' & 'default' classes.  'maxqueued'
*      is the maximum number of packets that the resource management
*      code will allow to be queued 'downstream' (this is typically 1).
*
*      Returns:        0 on success
*/

int
rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, uint64_t psecPerByte,
   void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
   int minidle, u_int offtime, int flags)
{
       int i, mtu;

       /*
        * Initialize the CBQ tracing/debug facility.
        */
       CBQTRACEINIT();

       mtu = ifq->altq_ifp->if_mtu;
       if (mtu < 1) {
               printf("altq: %s: invalid MTU (interface not initialized?)\n",
                   ifq->altq_ifp->if_xname);
               return EINVAL;
       }

       (void)memset((char *)ifd, 0, sizeof (*ifd));
       ifd->ifq_ = ifq;
       ifd->restart = restart;
       ifd->maxqueued_ = maxqueued;
       ifd->ps_per_byte_ = psecPerByte;
       ifd->maxpkt_ = mtu;
       ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
       ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
#if 1
       ifd->maxiftime_ = mtu * psecPerByte / 1000 / 1000 * 16;
       if ((int64_t)mtu * psecPerByte > (int64_t)10 * 1000000000)
               ifd->maxiftime_ /= 4;
#endif

       reset_cutoff(ifd);
       CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);

       /*
        * Initialize the CBQ's WRR state.
        */
       for (i = 0; i < RM_MAXPRIO; i++) {
               ifd->alloc_[i] = 0;
               ifd->M_[i] = 0;
               ifd->num_[i] = 0;
               ifd->na_[i] = 0;
               ifd->active_[i] = NULL;
       }

       /*
        * Initialize current packet state.
        */
       ifd->qi_ = 0;
       ifd->qo_ = 0;
       for (i = 0; i < RM_MAXQUEUED; i++) {
               ifd->class_[i] = NULL;
               ifd->curlen_[i] = 0;
               ifd->borrowed_[i] = NULL;
       }

       /*
        * Create the root class of the link-sharing structure.
        */
       if ((ifd->root_ = rmc_newclass(0, ifd,
                                      psecPerByte,
                                      rmc_root_overlimit, maxq, 0, 0,
                                      maxidle, minidle, offtime,
                                      0, 0)) == NULL) {
               printf("rmc_init: root class not allocated\n");
               return ENOMEM;
       }
       ifd->root_->depth_ = 0;

       return 0;
}

/*
* void
* rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
*      mbuf 'm' to queue for resource class 'cl'.  This routine is called
*      by a driver's if_output routine.  This routine must be called with
*      output packet completion interrupts locked out (to avoid racing with
*      rmc_dequeue_next).
*
*      Returns:        0 on successful queueing
*                      -1 when packet drop occurs
*/
int
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
{
       struct timespec  now;
       struct rm_ifdat *ifd = cl->ifdat_;
       int              cpri = cl->pri_;
       int              is_empty = qempty(cl->q_);

       RM_GETTIME(now);
       if (ifd->cutoff_ > 0) {
               if (TS_LT(&cl->undertime_, &now)) {
                       if (ifd->cutoff_ > cl->depth_)
                               ifd->cutoff_ = cl->depth_;
                       CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
               }
#if 1 /* ALTQ */
               else {
                       /*
                        * the class is overlimit. if the class has
                        * underlimit ancestors, set cutoff to the lowest
                        * depth among them.
                        */
                       struct rm_class *borrow = cl->borrow_;

                       while (borrow != NULL &&
                              borrow->depth_ < ifd->cutoff_) {
                               if (TS_LT(&borrow->undertime_, &now)) {
                                       ifd->cutoff_ = borrow->depth_;
                                       CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
                                       break;
                               }
                               borrow = borrow->borrow_;
                       }
               }
#else /* !ALTQ */
               else if ((ifd->cutoff_ > 1) && cl->borrow_) {
                       if (TS_LT(&cl->borrow_->undertime_, &now)) {
                               ifd->cutoff_ = cl->borrow_->depth_;
                               CBQTRACE(rmc_queue_packet, 'ffob',
                                        cl->borrow_->depth_);
                       }
               }
#endif /* !ALTQ */
       }

       if (_rmc_addq(cl, m) < 0)
               /* failed */
               return -1;

       if (is_empty) {
               CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
               ifd->na_[cpri]++;
       }

       if (qlen(cl->q_) > qlimit(cl->q_)) {
               /* note: qlimit can be set to 0 or 1 */
               rmc_drop_action(cl);
               return -1;
       }
       return 0;
}

/*
* void
* rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now) - Check all
*      classes to see if there are satified.
*/

static void
rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now)
{
       int              i;
       rm_class_t      *p, *bp;

       for (i = RM_MAXPRIO - 1; i >= 0; i--) {
               if ((bp = ifd->active_[i]) != NULL) {
                       p = bp;
                       do {
                               if (!rmc_satisfied(p, now)) {
                                       ifd->cutoff_ = p->depth_;
                                       return;
                               }
                               p = p->peer_;
                       } while (p != bp);
               }
       }

       reset_cutoff(ifd);
}

/*
* rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
*/

static int
rmc_satisfied(struct rm_class *cl, struct timespec *now)
{
       rm_class_t      *p;

       if (cl == NULL)
               return 1;
       if (TS_LT(now, &cl->undertime_))
               return 1;
       if (cl->depth_ == 0) {
               if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
                       return 0;
               else
                       return 1;
       }
       if (cl->children_ != NULL) {
               p = cl->children_;
               while (p != NULL) {
                       if (!rmc_satisfied(p, now))
                               return 0;
                       p = p->next_;
               }
       }

       return 1;
}

/*
* Return 1 if class 'cl' is under limit or can borrow from a parent,
* 0 if overlimit.  As a side-effect, this routine will invoke the
* class overlimit action if the class if overlimit.
*/

static int
rmc_under_limit(struct rm_class *cl, struct timespec *now)
{
       rm_class_t      *p = cl;
       rm_class_t      *top;
       struct rm_ifdat *ifd = cl->ifdat_;
       int sleeping = cl->sleeping_;

       ifd->borrowed_[ifd->qi_] = NULL;
       /*
        * If cl is the root class, then always return that it is
        * underlimit.  Otherwise, check to see if the class is underlimit.
        */
       if (cl->parent_ == NULL)
               return 1;

       if (!TS_LT(now, &cl->undertime_)) {
               /* Fast path: the given class is allowed to send packets */
               if (sleeping) {
                       CALLOUT_STOP(&cl->callout_);
                       cl->sleeping_ = 0;
                       cl->undertime_.tv_sec = 0;
               }
               return 1;
       }

       top = NULL;
       do {
               if (((cl = cl->borrow_) == NULL) ||
                   (cl->depth_ > ifd->cutoff_)) {
                       /* No need to call the delay action */
                       if (sleeping)
                               return 0;
#ifdef ADJUST_CUTOFF
                       if (cl != NULL)
                               /* cutoff is taking effect, just
                                  return false without calling
                                  the delay action. */
                               return 0;
#endif
#ifdef BORROW_OFFTIME
                       /*
                        * check if the class can borrow offtime too.
                        * borrow offtime from the top of the borrow
                        * chain if the top class is not overloaded.
                        */
                       if (cl != NULL) {
                               /* cutoff is taking effect, use this class as top. */
                               top = cl;
                               CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
                       }
                       if (top != NULL && top->avgidle_ == top->minidle_)
                               top = NULL;
                       p->overtime_ = *now;
                       (p->overlimit)(p, top);
#else
                       p->overtime_ = *now;
                       (p->overlimit)(p, NULL);
#endif
                       return 0;
               }
               top = cl;
       } while (cl->undertime_.tv_sec && TS_LT(now, &cl->undertime_));

       if (cl != p)
               ifd->borrowed_[ifd->qi_] = cl;
       return 1;
}

/*
* _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
*      Packet-by-packet round robin.
*
* The heart of the weighted round-robin scheduler, which decides which
* class next gets to send a packet.  Highest priority first, then
* weighted round-robin within priorites.
*
* Each able-to-send class gets to send until its byte allocation is
* exhausted.  Thus, the active pointer is only changed after a class has
* exhausted its allocation.
*
* If the scheduler finds no class that is underlimit or able to borrow,
* then the first class found that had a nonzero queue and is allowed to
* borrow gets to send.
*/

static mbuf_t *
_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
{
       struct rm_class *cl = NULL, *first = NULL;
       u_int            deficit;
       int              cpri;
       mbuf_t          *m;
       struct timespec  now;

       RM_GETTIME(now);

       /*
        * if the driver polls the top of the queue and then removes
        * the polled packet, we must return the same packet.
        */
       if (op == ALTDQ_REMOVE && ifd->pollcache_) {
               cl = ifd->pollcache_;
               cpri = cl->pri_;
               if (ifd->efficient_) {
                       /* check if this class is overlimit */
                       if (cl->undertime_.tv_sec != 0 &&
                           rmc_under_limit(cl, &now) == 0)
                               first = cl;
               }
               ifd->pollcache_ = NULL;
               goto _wrr_out;
       }
       else {
               /* mode == ALTDQ_POLL || pollcache == NULL */
               ifd->pollcache_ = NULL;
               ifd->borrowed_[ifd->qi_] = NULL;
       }
#ifdef ADJUST_CUTOFF
_again:
#endif
       for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
               if (ifd->na_[cpri] == 0)
                       continue;
               deficit = 0;
               /*
                * Loop through twice for a priority level, if some class
                * was unable to send a packet the first round because
                * of the weighted round-robin mechanism.
                * During the second loop at this level, deficit==2.
                * (This second loop is not needed if for every class,
                * "M[cl->pri_])" times "cl->allotment" is greater than
                * the byte size for the largest packet in the class.)
                */
_wrr_loop:
               cl = ifd->active_[cpri];
               ASSERT(cl != NULL);
               do {
                       if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
                               cl->bytes_alloc_ += cl->w_allotment_;
                       if (!qempty(cl->q_)) {
                               if ((cl->undertime_.tv_sec == 0) ||
                                   rmc_under_limit(cl, &now)) {
                                       if (cl->bytes_alloc_ > 0 || deficit > 1)
                                               goto _wrr_out;

                                       /* underlimit but no alloc */
                                       deficit = 1;
#if 1
                                       ifd->borrowed_[ifd->qi_] = NULL;
#endif
                               }
                               else if (first == NULL && cl->borrow_ != NULL)
                                       first = cl; /* borrowing candidate */
                       }

                       cl->bytes_alloc_ = 0;
                       cl = cl->peer_;
               } while (cl != ifd->active_[cpri]);

               if (deficit == 1) {
                       /* first loop found an underlimit class with deficit */
                       /* Loop on same priority level, with new deficit.  */
                       deficit = 2;
                       goto _wrr_loop;
               }
       }

#ifdef ADJUST_CUTOFF
       /*
        * no underlimit class found.  if cutoff is taking effect,
        * increase cutoff and try again.
        */
       if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
               ifd->cutoff_++;
               CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
               goto _again;
       }
#endif /* ADJUST_CUTOFF */
       /*
        * If LINK_EFFICIENCY is turned on, then the first overlimit
        * class we encounter will send a packet if all the classes
        * of the link-sharing structure are overlimit.
        */
       reset_cutoff(ifd);
       CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);

       if (!ifd->efficient_ || first == NULL)
               return NULL;

       cl = first;
       cpri = cl->pri_;
#if 0   /* too time-consuming for nothing */
       if (cl->sleeping_)
               CALLOUT_STOP(&cl->callout_);
       cl->sleeping_ = 0;
       cl->undertime_.tv_sec = 0;
#endif
       ifd->borrowed_[ifd->qi_] = cl->borrow_;
       ifd->cutoff_ = cl->borrow_->depth_;

       /*
        * Deque the packet and do the book keeping...
        */
_wrr_out:
       if (op == ALTDQ_REMOVE) {
               m = _rmc_getq(cl);
               if (m == NULL)
                       panic("_rmc_wrr_dequeue_next");
               if (qempty(cl->q_))
                       ifd->na_[cpri]--;

               /*
                * Update class statistics and link data.
                */
               if (cl->bytes_alloc_ > 0)
                       cl->bytes_alloc_ -= m_pktlen(m);

               if ((cl->bytes_alloc_ <= 0) || first == cl)
                       ifd->active_[cl->pri_] = cl->peer_;
               else
                       ifd->active_[cl->pri_] = cl;

               ifd->class_[ifd->qi_] = cl;
               ifd->curlen_[ifd->qi_] = m_pktlen(m);
               ifd->now_[ifd->qi_] = now;
               ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
               ifd->queued_++;
       } else {
               /* mode == ALTDQ_PPOLL */
               m = _rmc_pollq(cl);
               ifd->pollcache_ = cl;
       }
       return m;
}

/*
* Dequeue & return next packet from the highest priority class that
* has a packet to send & has enough allocation to send it.  This
* routine is called by a driver whenever it needs a new packet to
* output.
*/
static mbuf_t *
_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
{
       mbuf_t          *m;
       int              cpri;
       struct rm_class *cl, *first = NULL;
       struct timespec  now;

       RM_GETTIME(now);

       /*
        * if the driver polls the top of the queue and then removes
        * the polled packet, we must return the same packet.
        */
       if (op == ALTDQ_REMOVE && ifd->pollcache_) {
               cl = ifd->pollcache_;
               cpri = cl->pri_;
               ifd->pollcache_ = NULL;
               goto _prr_out;
       } else {
               /* mode == ALTDQ_POLL || pollcache == NULL */
               ifd->pollcache_ = NULL;
               ifd->borrowed_[ifd->qi_] = NULL;
       }
#ifdef ADJUST_CUTOFF
_again:
#endif
       for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
               if (ifd->na_[cpri] == 0)
                       continue;
               cl = ifd->active_[cpri];
               ASSERT(cl != NULL);
               do {
                       if (!qempty(cl->q_)) {
                               if ((cl->undertime_.tv_sec == 0) ||
                                   rmc_under_limit(cl, &now))
                                       goto _prr_out;
                               if (first == NULL && cl->borrow_ != NULL)
                                       first = cl;
                       }
                       cl = cl->peer_;
               } while (cl != ifd->active_[cpri]);
       }

#ifdef ADJUST_CUTOFF
       /*
        * no underlimit class found.  if cutoff is taking effect, increase
        * cutoff and try again.
        */
       if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
               ifd->cutoff_++;
               goto _again;
       }
#endif /* ADJUST_CUTOFF */
       /*
        * If LINK_EFFICIENCY is turned on, then the first overlimit
        * class we encounter will send a packet if all the classes
        * of the link-sharing structure are overlimit.
        */
       reset_cutoff(ifd);
       if (!ifd->efficient_ || first == NULL)
               return NULL;

       cl = first;
       cpri = cl->pri_;
#if 0   /* too time-consuming for nothing */
       if (cl->sleeping_)
               CALLOUT_STOP(&cl->callout_);
       cl->sleeping_ = 0;
       cl->undertime_.tv_sec = 0;
#endif
       ifd->borrowed_[ifd->qi_] = cl->borrow_;
       ifd->cutoff_ = cl->borrow_->depth_;

       /*
        * Deque the packet and do the book keeping...
        */
_prr_out:
       if (op == ALTDQ_REMOVE) {
               m = _rmc_getq(cl);
               if (m == NULL)
                       panic("_rmc_prr_dequeue_next");
               if (qempty(cl->q_))
                       ifd->na_[cpri]--;

               ifd->active_[cpri] = cl->peer_;

               ifd->class_[ifd->qi_] = cl;
               ifd->curlen_[ifd->qi_] = m_pktlen(m);
               ifd->now_[ifd->qi_] = now;
               ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
               ifd->queued_++;
       } else {
               /* mode == ALTDQ_POLL */
               m = _rmc_pollq(cl);
               ifd->pollcache_ = cl;
       }
       return m;
}

/*
* mbuf_t *
* rmc_dequeue_next(struct rm_ifdat *ifd, struct timespec *now) - this function
*      is invoked by the packet driver to get the next packet to be
*      dequeued and output on the link.  If WRR is enabled, then the
*      WRR dequeue next routine will determine the next packet to sent.
*      Otherwise, packet-by-packet round robin is invoked.
*
*      Returns:        NULL, if a packet is not available or if all
*                      classes are overlimit.
*
*                      Otherwise, Pointer to the next packet.
*/

mbuf_t *
rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
{
       if (ifd->queued_ >= ifd->maxqueued_)
               return NULL;
       else if (ifd->wrr_)
               return (_rmc_wrr_dequeue_next(ifd, mode));
       else
               return (_rmc_prr_dequeue_next(ifd, mode));
}

/*
* Update the utilization estimate for the packet that just completed.
* The packet's class & the parent(s) of that class all get their
* estimators updated.  This routine is called by the driver's output-
* packet-completion interrupt service routine.
*/

/*
* a macro to approximate "divide by 1000" that gives 0.000999,
* if a value has enough effective digits.
* (on pentium, mul takes 9 cycles but div takes 46!)
*/
#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
/* Don't worry.  Recent compilers don't use div. */
#define PSEC_TO_USEC(t) ((t) / 1000 / 1000)
void
rmc_update_class_util(struct rm_ifdat *ifd)
{
       int64_t          idle, avgidle, pktlen;
       int64_t          pkt_time;
       int64_t          tidle;
       rm_class_t      *cl, *cl0, *borrowed;
       rm_class_t      *borrows;
       struct timespec *nowp;

       /*
        * Get the most recent completed class.
        */
       if ((cl = ifd->class_[ifd->qo_]) == NULL)
               return;

       cl0 = cl;
       pktlen = (int64_t)ifd->curlen_[ifd->qo_];
       borrowed = ifd->borrowed_[ifd->qo_];
       borrows = borrowed;

       PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);

       /*
        * Run estimator on class and its ancestors.
        */
       /*
        * rm_update_class_util is designed to be called when the
        * transfer is completed from a xmit complete interrupt,
        * but most drivers don't implement an upcall for that.
        * so, just use estimated completion time.
        * as a result, ifd->qi_ and ifd->qo_ are always synced.
        */
       nowp = &ifd->now_[ifd->qo_];
       /* get pkt_time (for link) in usec */
#if 1  /* use approximation */
       pkt_time = (int64_t)ifd->curlen_[ifd->qo_] * (int64_t)ifd->ps_per_byte_;
       pkt_time = PSEC_TO_NSEC(pkt_time);
#else
       pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
#endif
       if (ifd->ifq_->altq_ifp->if_type == IFT_PPP) {
               if (TS_LT(nowp, &ifd->ifnow_)) {
                       int iftime;

                       /*
                        * make sure the estimated completion time does not go
                        * too far.  it can happen when the link layer supports
                        * data compression or the interface speed is set to
                        * a much lower value.
                        */
                       TS_DELTA(&ifd->ifnow_, nowp, iftime);
                       if (iftime+pkt_time < ifd->maxiftime_) {
                               TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
                       } else {
                               TS_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
                       }
               } else {
                       TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
               }
       } else {
               if (TS_LT(nowp, &ifd->ifnow_)) {
                       TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
               } else {
                       TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
               }
       }

       while (cl != NULL) {
               TS_DELTA(&ifd->ifnow_, &cl->last_, idle);
               if (idle >= 2000000000)
                       /*
                        * this class is idle enough, reset avgidle.
                        * (TS_DELTA returns 2000000000 ns when delta is large.)
                        */
                       cl->avgidle_ = cl->maxidle_;

               /* get pkt_time (for class) in usec */
#if 1  /* use approximation */
               pkt_time = pktlen * (int64_t)cl->ps_per_byte_;
               pkt_time = PSEC_TO_NSEC(pkt_time);
#else
               pkt_time = pktlen * cl->ns_per_byte_ / 1000;
#endif
               idle -= pkt_time;

               avgidle = cl->avgidle_;
               avgidle += idle - (avgidle >> RM_FILTER_GAIN);
               cl->avgidle_ = avgidle;

               /* Are we overlimit ? */
               if (avgidle <= 0) {
                       CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
#if 1 /* ALTQ */
                       /*
                        * need some lower bound for avgidle, otherwise
                        * a borrowing class gets unbounded penalty.
                        */
                       if (avgidle < cl->minidle_)
                               avgidle = cl->avgidle_ = cl->minidle_;
#endif
                       /* set next idle to make avgidle 0 */
                       tidle = pkt_time +
                               (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
                       TS_ADD_DELTA(nowp, tidle, &cl->undertime_);
                       ++cl->stats_.over;
               } else {
                       cl->avgidle_ =
                           (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
                       cl->undertime_.tv_sec = 0;
                       if (cl->sleeping_) {
                               CALLOUT_STOP(&cl->callout_);
                               cl->sleeping_ = 0;
                       }
               }

               if (borrows != NULL) {
                       if (borrows != cl)
                               ++cl->stats_.borrows;
                       else
                               borrows = NULL;
               }
               cl->last_ = ifd->ifnow_;
               cl->last_pkttime_ = pkt_time;

#if 1
               if (cl->parent_ == NULL && cl != cl0) {
                       /* take stats of root class */
                       PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
               }
#endif

               cl = cl->parent_;
       }

       /*
        * Check to see if cutoff needs to set to a new level.
        */
       cl = ifd->class_[ifd->qo_];
       if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
#if 1 /* ALTQ */
               if ((qlen(cl->q_) <= 0) || TS_LT(nowp, &borrowed->undertime_)) {
                       rmc_tl_satisfied(ifd, nowp);
                       CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
               } else {
                       ifd->cutoff_ = borrowed->depth_;
                       CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
               }
#else /* !ALTQ */
               if ((qlen(cl->q_) <= 1) || TS_LT(&now, &borrowed->undertime_)) {
                       reset_cutoff(ifd);
#ifdef notdef
                       rmc_tl_satisfied(ifd, &now);
#endif
                       CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
               } else {
                       ifd->cutoff_ = borrowed->depth_;
                       CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
               }
#endif /* !ALTQ */
       }

       /*
        * Release class slot
        */
       ifd->borrowed_[ifd->qo_] = NULL;
       ifd->class_[ifd->qo_] = NULL;
       ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
       ifd->queued_--;
}

/*
* void
* rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
*      over-limit action routines.  These get invoked by rmc_under_limit()
*      if a class with packets to send if over its bandwidth limit & can't
*      borrow from a parent class.
*
*      Returns: NONE
*/

static void
rmc_drop_action(struct rm_class *cl)
{
       struct rm_ifdat *ifd = cl->ifdat_;

       ASSERT(qlen(cl->q_) > 0);
       _rmc_dropq(cl);
       if (qempty(cl->q_))
               ifd->na_[cl->pri_]--;
}

void
rmc_dropall(struct rm_class *cl)
{
       struct rm_ifdat *ifd = cl->ifdat_;

       if (!qempty(cl->q_)) {
               _flushq(cl->q_);

               ifd->na_[cl->pri_]--;
       }
}

#if (__FreeBSD_version > 300000)
static int tvhzto(struct timeval *);

static int
tvhzto(struct timeval *tv)
{
       struct timeval t2;

       getmicrotime(&t2);
       t2.tv_sec = tv->tv_sec - t2.tv_sec;
       t2.tv_usec = tv->tv_usec - t2.tv_usec;
       return tvtohz(&t2);
}
#endif /* __FreeBSD_version > 300000 */

/*
* void
* rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
*      delay action routine.  It is invoked via rmc_under_limit when the
*      packet is discovered to be overlimit.
*
*      If the delay action is result of borrow class being overlimit, then
*      delay for the offtime of the borrowing class that is overlimit.
*
*      Returns: NONE
*/

void
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
{
       int     t;
       int64_t ndelay, extradelay;

       cl->stats_.overactions++;
       if (borrow != NULL)
               TS_DELTA(&borrow->undertime_, &cl->overtime_, ndelay);
       else
               TS_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
#ifndef BORROW_OFFTIME
       ndelay += cl->offtime_;
#endif

       if (!cl->sleeping_) {
               CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
#ifdef BORROW_OFFTIME
               if (borrow != NULL)
                       extradelay = borrow->offtime_;
               else
#endif
                       extradelay = cl->offtime_;

#ifdef ALTQ
               /*
                * XXX recalculate suspend time:
                * current undertime is (tidle + pkt_time) calculated
                * from the last transmission.
                *      tidle: time required to bring avgidle back to 0
                *      pkt_time: target waiting time for this class
                * we need to replace pkt_time by offtime
                */
               extradelay -= cl->last_pkttime_;
#endif
               if (extradelay > 0) {
                       TS_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
                       ndelay += extradelay;
               }

               cl->sleeping_ = 1;
               cl->stats_.delays++;

               /*
                * Since packets are phased randomly with respect to the
                * clock, 1 tick (the next clock tick) can be an arbitrarily
                * short time so we have to wait for at least two ticks.
                * NOTE:  If there's no other traffic, we need the timer as
                * a 'backstop' to restart this class.
                */
               if (NSEC_TO_USEC(ndelay) > tick * 2) {
#ifdef __FreeBSD__
                       /* FreeBSD rounds up the tick */
                       t = tvhzto(&cl->undertime_);
#else
                       /* other BSDs round down the tick */
                       t = tshzto(&cl->undertime_) + 1;
#endif
               } else
                       t = 2;
               CALLOUT_RESET(&cl->callout_, t,
                             (timeout_t *)rmc_restart, (void *)cl);
       }
}

/*
* void
* rmc_restart() - is just a helper routine for rmc_delay_action -- it is
*      called by the system timer code & is responsible checking if the
*      class is still sleeping (it might have been restarted as a side
*      effect of the queue scan on a packet arrival) and, if so, restarting
*      output for the class.  Inspecting the class state & restarting output
*      require locking the class structure.  In general the driver is
*      responsible for locking but this is the only routine that is not
*      called directly or indirectly from the interface driver so it has
*      know about system locking conventions.  Under bsd, locking is done
*      by raising IPL to splnet so that's what's implemented here.  On a
*      different system this would probably need to be changed.
*
*      Returns:        NONE
*/

static void
rmc_restart(struct rm_class *cl)
{
       struct rm_ifdat *ifd = cl->ifdat_;
       int              s;

       s = splnet();
       if (cl->sleeping_) {
               cl->sleeping_ = 0;
               cl->undertime_.tv_sec = 0;

               if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
                       CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
                       (ifd->restart)(ifd->ifq_);
               }
       }
       splx(s);
}

/*
* void
* rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
*      handling routine for the root class of the link sharing structure.
*
*      Returns: NONE
*/

static void
rmc_root_overlimit(struct rm_class *cl,
   struct rm_class *borrow)
{
       panic("rmc_root_overlimit");
}

/*
* Packet Queue handling routines.  Eventually, this is to localize the
*      effects on the code whether queues are red queues or droptail
*      queues.
*/

static int
_rmc_addq(rm_class_t *cl, mbuf_t *m)
{
#ifdef ALTQ_RIO
       if (q_is_rio(cl->q_))
               return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
#endif
#ifdef ALTQ_RED
       if (q_is_red(cl->q_))
               return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
#endif /* ALTQ_RED */

       if (cl->flags_ & RMCF_CLEARDSCP)
               write_dsfield(m, cl->pktattr_, 0);

       _addq(cl->q_, m);
       return 0;
}

/* note: _rmc_dropq is not called for red */
static void
_rmc_dropq(rm_class_t *cl)
{
       mbuf_t  *m;

       if ((m = _getq(cl->q_)) != NULL)
               m_freem(m);
}

static mbuf_t *
_rmc_getq(rm_class_t *cl)
{
#ifdef ALTQ_RIO
       if (q_is_rio(cl->q_))
               return rio_getq((rio_t *)cl->red_, cl->q_);
#endif
#ifdef ALTQ_RED
       if (q_is_red(cl->q_))
               return red_getq(cl->red_, cl->q_);
#endif
       return _getq(cl->q_);
}

static mbuf_t *
_rmc_pollq(rm_class_t *cl)
{
       return qhead(cl->q_);
}

#ifdef CBQ_TRACE

struct cbqtrace          cbqtrace_buffer[NCBQTRACE+1];
struct cbqtrace         *cbqtrace_ptr = NULL;
int                      cbqtrace_count;

/*
* DDB hook to trace cbq events:
*  the last 1024 events are held in a circular buffer.
*  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
*/
void cbqtrace_dump(int);
static char *rmc_funcname(void *);

static struct rmc_funcs {
       void    *func;
       char    *name;
} rmc_funcs[] =
{
       rmc_init,               "rmc_init",
       rmc_queue_packet,       "rmc_queue_packet",
       rmc_under_limit,        "rmc_under_limit",
       rmc_update_class_util,  "rmc_update_class_util",
       rmc_delay_action,       "rmc_delay_action",
       rmc_restart,            "rmc_restart",
       _rmc_wrr_dequeue_next,  "_rmc_wrr_dequeue_next",
       NULL,                   NULL
};

static char *
rmc_funcname(void *func)
{
       struct rmc_funcs *fp;

       for (fp = rmc_funcs; fp->func != NULL; fp++)
               if (fp->func == func)
                       return (fp->name);
       return ("unknown");
}

void
cbqtrace_dump(int counter)
{
       int      i, *p;
       char    *cp;

       counter = counter % NCBQTRACE;
       p = (int *)&cbqtrace_buffer[counter];

       for (i=0; i<20; i++) {
               printf("[0x%x] ", *p++);
               printf("%s: ", rmc_funcname((void *)*p++));
               cp = (char *)p++;
               printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
               printf("%d\n",*p++);

               if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
                       p = (int *)cbqtrace_buffer;
       }
}
#endif /* CBQ_TRACE */
#endif /* ALTQ_CBQ */

#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)

void
_addq(class_queue_t *q, mbuf_t *m)
{
       mbuf_t  *m0;

       if ((m0 = qtail(q)) != NULL)
               m->m_nextpkt = m0->m_nextpkt;
       else
               m0 = m;
       m0->m_nextpkt = m;
       qtail(q) = m;
       qlen(q)++;
}

mbuf_t *
_getq(class_queue_t *q)
{
       mbuf_t  *m, *m0;

       if ((m = qtail(q)) == NULL)
               return NULL;
       if ((m0 = m->m_nextpkt) != m)
               m->m_nextpkt = m0->m_nextpkt;
       else {
               ASSERT(qlen(q) == 1);
               qtail(q) = NULL;
       }
       qlen(q)--;
       m0->m_nextpkt = NULL;
       return m0;
}

/* drop a packet at the tail of the queue */
mbuf_t *
_getq_tail(class_queue_t *q)
{
       mbuf_t  *m, *m0, *prev;

       if ((m = m0 = qtail(q)) == NULL)
               return NULL;
       do {
               prev = m0;
               m0 = m0->m_nextpkt;
       } while (m0 != m);
       prev->m_nextpkt = m->m_nextpkt;
       if (prev == m)  {
               ASSERT(qlen(q) == 1);
               qtail(q) = NULL;
       } else
               qtail(q) = prev;
       qlen(q)--;
       m->m_nextpkt = NULL;
       return m;
}

/* randomly select a packet in the queue */
mbuf_t *
_getq_random(class_queue_t *q)
{
       struct mbuf     *m;
       int              i, n;

       if ((m = qtail(q)) == NULL)
               return NULL;
       if (m->m_nextpkt == m) {
               ASSERT(qlen(q) == 1);
               qtail(q) = NULL;
       } else {
               struct mbuf *prev = NULL;

               n = cprng_fast32() % qlen(q) + 1;
               for (i = 0; i < n; i++) {
                       prev = m;
                       m = m->m_nextpkt;
               }
               prev->m_nextpkt = m->m_nextpkt;
               if (m == qtail(q))
                       qtail(q) = prev;
       }
       qlen(q)--;
       m->m_nextpkt = NULL;
       return m;
}

void
_removeq(class_queue_t *q, mbuf_t *m)
{
       mbuf_t  *m0, *prev;

       m0 = qtail(q);
       do {
               prev = m0;
               m0 = m0->m_nextpkt;
       } while (m0 != m);
       prev->m_nextpkt = m->m_nextpkt;
       if (prev == m)
               qtail(q) = NULL;
       else if (qtail(q) == m)
               qtail(q) = prev;
       qlen(q)--;
}

void
_flushq(class_queue_t *q)
{
       mbuf_t *m;

       while ((m = _getq(q)) != NULL)
               m_freem(m);
       ASSERT(qlen(q) == 0);
}

#endif /* !__GNUC__ || ALTQ_DEBUG */
#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */