/*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
* Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)bpf.c 8.4 (Berkeley) 1/9/95
* static char rcsid[] =
* "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
*/
#ifndef BPF_BUFSIZE
/*
* 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
* jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
*/
# define BPF_BUFSIZE 32768
#endif
#define PRINET 26 /* interruptible */
/*
* The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
* XXX the default values should be computed dynamically based
* on available memory size and available mbuf clusters.
*/
static int bpf_bufsize = BPF_BUFSIZE;
static int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */
static bool bpf_jit = false;
/*
* Locking notes:
* - bpf_mtx (adaptive mutex) protects:
* - Gobal lists: bpf_iflist and bpf_dlist
* - struct bpf_if
* - bpf_close
* - bpf_psz (pserialize)
* - struct bpf_d has two mutexes:
* - bd_buf_mtx (spin mutex) protects the buffers that can be accessed
* on packet tapping
* - bd_mtx (adaptive mutex) protects member variables other than the buffers
* - Locking order: bpf_mtx => bpf_d#bd_mtx => bpf_d#bd_buf_mtx
* - struct bpf_d obtained via fp->f_bpf in bpf_read and bpf_write is
* never freed because struct bpf_d is only freed in bpf_close and
* bpf_close never be called while executing bpf_read and bpf_write
* - A filter that is assigned to bpf_d can be replaced with another filter
* while tapping packets, so it needs to be done atomically
* - struct bpf_d is iterated on bpf_dlist with psz
* - struct bpf_if is iterated on bpf_iflist with psz or psref
*/
/*
* Use a mutex to avoid a race condition between gathering the stats/peers
* and opening/closing the device.
*/
static kmutex_t bpf_mtx;
/*
* bpf_iflist is the list of interfaces; each corresponds to an ifnet
* bpf_dtab holds the descriptors, indexed by minor device #
*/
static struct pslist_head bpf_iflist;
static struct pslist_head bpf_dlist;
/*
* Build a sockaddr based on the data link layer type.
* We do this at this level because the ethernet header
* is copied directly into the data field of the sockaddr.
* In the case of SLIP, there is no header and the packet
* is forwarded as is.
* Also, we are careful to leave room at the front of the mbuf
* for the link level header.
*/
switch (linktype) {
case DLT_NULL:
sockp->sa_family = AF_UNSPEC;
if (ifp->if_type == IFT_LOOP) {
/* Set here to apply the following validations */
hlen = sizeof(uint32_t);
} else
hlen = 0;
align = 0;
break;
default:
return (EIO);
}
len = uio->uio_resid;
/*
* If there aren't enough bytes for a link level header or the
* packet length exceeds the interface mtu, return an error.
*/
if (len - hlen > mtu)
return (EMSGSIZE);
if (hlen != 0) {
if (linktype == DLT_NULL && ifp->if_type == IFT_LOOP) {
uint32_t af;
/* the link header indicates the address family */
memcpy(&af, mtod(m0, void *), sizeof(af));
sockp->sa_family = af;
} else {
/* move link level header in the top of mbuf to sa_data */
memcpy(sockp->sa_data, mtod(m0, void *), hlen);
}
m0->m_data += hlen;
m0->m_len -= hlen;
}
m_claimm(m, ifp->if_mowner);
*mp = m0;
return (0);
bad:
m_freem(m0);
return (error);
}
/*
* Attach file to the bpf interface, i.e. make d listen on bp.
*/
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
{
struct bpf_event_tracker *t;
KASSERT(mutex_owned(&bpf_mtx));
KASSERT(mutex_owned(d->bd_mtx));
/*
* Point d at bp, and add d to the interface's list of listeners.
* Finally, point the driver's bpf cookie at the interface so
* it will divert packets to bpf.
*/
d->bd_bif = bp;
BPFIF_DLIST_WRITER_INSERT_HEAD(bp, d);
bp = d->bd_bif;
/*
* Check if this descriptor had requested promiscuous mode.
* If so, turn it off.
*/
if (d->bd_promisc) {
int error __diagused;
d->bd_promisc = 0;
/*
* Take device out of promiscuous mode. Since we were
* able to enter promiscuous mode, we should be able
* to turn it off. But we can get an error if
* the interface was configured down, so only panic
* if we don't get an unexpected error.
*/
KERNEL_LOCK_UNLESS_NET_MPSAFE();
error = ifpromisc(bp->bif_ifp, 0);
KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
#ifdef DIAGNOSTIC
if (error)
printf("%s: ifpromisc failed: %d", __func__, error);
#endif
}
/* Remove d from the interface's descriptor list. */
BPFIF_DLIST_WRITER_REMOVE(d);
pserialize_perform(bpf_psz);
if (BPFIF_DLIST_WRITER_EMPTY(bp)) {
/*
* Let the driver know that there are no more listeners.
*/
*d->bd_bif->bif_driverp = NULL;
}
/*
* bpfilterattach() is called at boot time. We don't need to do anything
* here, since any initialization will happen as part of module init code.
*/
/* ARGSUSED */
void
bpfilterattach(int n)
{
}
/*
* Open ethernet device. Clones.
*/
/* ARGSUSED */
int
bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
{
struct bpf_d *d;
struct file *fp;
int error, fd;
/* falloc() will fill in the descriptor for us. */
if ((error = fd_allocfile(&fp, &fd)) != 0)
return error;
/*
* Close the descriptor by detaching it from its interface,
* deallocating its buffers, and marking it free.
*/
/* ARGSUSED */
static int
bpf_close(struct file *fp)
{
struct bpf_d *d;
/*
* Rotate the packet buffers in descriptor d. Move the store buffer
* into the hold slot, and the free buffer into the store slot.
* Zero the length of the new store buffer.
*/
#define ROTATE_BUFFERS(d) \
(d)->bd_hbuf = (d)->bd_sbuf; \
(d)->bd_hlen = (d)->bd_slen; \
(d)->bd_sbuf = (d)->bd_fbuf; \
(d)->bd_slen = 0; \
(d)->bd_fbuf = NULL;
/*
* bpfread - read next chunk of packets from buffers
*/
static int
bpf_read(struct file *fp, off_t *offp, struct uio *uio,
kauth_cred_t cred, int flags)
{
struct bpf_d *d = fp->f_bpf;
int timed_out;
int error;
/*
* Refresh the PID associated with this bpf file.
*/
d->bd_pid = curproc->p_pid;
getnanotime(&d->bd_atime);
/*
* Restrict application to use a buffer the same size as
* the kernel buffers.
*/
if (uio->uio_resid != d->bd_bufsize)
return (EINVAL);
mutex_enter(d->bd_buf_mtx);
if (d->bd_state == BPF_WAITING)
callout_halt(&d->bd_callout, d->bd_buf_mtx);
timed_out = (d->bd_state == BPF_TIMED_OUT);
d->bd_state = BPF_IDLE;
mutex_exit(d->bd_buf_mtx);
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
mutex_enter(d->bd_buf_mtx);
while (d->bd_hbuf == NULL) {
if (fp->f_flag & FNONBLOCK) {
if (d->bd_slen == 0) {
error = EWOULDBLOCK;
goto out;
}
ROTATE_BUFFERS(d);
break;
}
if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
* Rotate the buffers and return what's here.
*/
ROTATE_BUFFERS(d);
break;
}
if (error == EINTR || error == ERESTART)
goto out;
if (error == EWOULDBLOCK) {
/*
* On a timeout, return what's in the buffer,
* which may be nothing. If there is something
* in the store buffer, we can rotate the buffers.
*/
if (d->bd_hbuf)
/*
* We filled up the buffer in between
* getting the timeout and arriving
* here, so we don't need to rotate.
*/
break;
if (d->bd_slen == 0) {
error = 0;
goto out;
}
ROTATE_BUFFERS(d);
break;
}
if (error != 0)
goto out;
}
/*
* At this point, we know we have something in the hold slot.
*/
mutex_exit(d->bd_buf_mtx);
/*
* Move data from hold buffer into user space.
* We know the entire buffer is transferred since
* we checked above that the read buffer is bpf_bufsize bytes.
*/
error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
/*
* If writing to a loopback interface, the address family has
* already been specially computed in bpf_movein(), so don't
* clobber it, or the loopback will reject it in looutput().
*/
if (d->bd_hdrcmplt && ifp->if_type != IFT_LOOP)
dst.ss_family = pseudo_AF_HDRCMPLT;
if (d->bd_feedback) {
mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
if (mc != NULL)
m_set_rcvif(mc, ifp);
/* Set M_PROMISC for outgoing packets to be discarded. */
if (1 /*d->bd_direction == BPF_D_INOUT*/)
m->m_flags |= M_PROMISC;
} else
mc = NULL;
error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);
if (mc != NULL) {
if (error == 0) {
int s = splsoftnet();
KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
ifp->_if_input(ifp, mc);
KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
splx(s);
} else
m_freem(mc);
}
/*
* The driver frees the mbuf.
*/
out:
bpf_if_release(bp, &psref);
out_bindx:
curlwp_bindx(bound);
return error;
}
/*
* Reset a descriptor by flushing its packet buffer and clearing the
* receive and drop counts.
*/
static void
reset_d(struct bpf_d *d)
{
/*
* FIONREAD Check for read packet available.
* BIOCGBLEN Get buffer len [for read()].
* BIOCSETF Set ethernet read filter.
* BIOCFLUSH Flush read packet buffer.
* BIOCPROMISC Put interface into promiscuous mode.
* BIOCGDLT Get link layer type.
* BIOCGETIF Get interface name.
* BIOCSETIF Set interface.
* BIOCSRTIMEOUT Set read timeout.
* BIOCGRTIMEOUT Get read timeout.
* BIOCGSTATS Get packet stats.
* BIOCIMMEDIATE Set immediate mode.
* BIOCVERSION Get filter language version.
* BIOCGHDRCMPLT Get "header already complete" flag.
* BIOCSHDRCMPLT Set "header already complete" flag.
* BIOCSFEEDBACK Set packet feedback mode.
* BIOCGFEEDBACK Get packet feedback mode.
* BIOCGDIRECTION Get packet direction flag
* BIOCSDIRECTION Set packet direction flag
*/
/* ARGSUSED */
static int
bpf_ioctl(struct file *fp, u_long cmd, void *addr)
{
struct bpf_d *d = fp->f_bpf;
int error = 0;
/*
* Refresh the PID associated with this bpf file.
*/
d->bd_pid = curproc->p_pid;
#ifdef _LP64
if (curproc->p_flag & PK_32)
d->bd_compat32 = 1;
else
d->bd_compat32 = 0;
#endif
/*
* Put interface into promiscuous mode.
*/
case BIOCPROMISC:
mutex_enter(d->bd_mtx);
if (d->bd_bif == NULL) {
mutex_exit(d->bd_mtx);
/*
* No interface attached yet.
*/
error = EINVAL;
break;
}
if (d->bd_promisc == 0) {
KERNEL_LOCK_UNLESS_NET_MPSAFE();
error = ifpromisc(d->bd_bif->bif_ifp, 1);
KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
if (error == 0)
d->bd_promisc = 1;
}
mutex_exit(d->bd_mtx);
break;
/*
* Get device parameters.
*/
case BIOCGDLT:
mutex_enter(d->bd_mtx);
if (d->bd_bif == NULL)
error = EINVAL;
else
*(u_int *)addr = d->bd_bif->bif_dlt;
mutex_exit(d->bd_mtx);
break;
/*
* Get a list of supported device parameters.
*/
case BIOCGDLTLIST:
mutex_enter(d->bd_mtx);
if (d->bd_bif == NULL)
error = EINVAL;
else
error = bpf_getdltlist(d, addr);
mutex_exit(d->bd_mtx);
break;
/*
* Set device parameters.
*/
case BIOCSDLT:
mutex_enter(&bpf_mtx);
mutex_enter(d->bd_mtx);
if (d->bd_bif == NULL)
error = EINVAL;
else
error = bpf_setdlt(d, *(u_int *)addr);
mutex_exit(d->bd_mtx);
mutex_exit(&bpf_mtx);
break;
/*
* Set interface name.
*/
#ifdef OBIOCGETIF
case OBIOCGETIF:
#endif
case BIOCGETIF:
mutex_enter(d->bd_mtx);
if (d->bd_bif == NULL)
error = EINVAL;
else
bpf_ifname(d->bd_bif->bif_ifp, addr);
mutex_exit(d->bd_mtx);
break;
/*
* Set interface.
*/
#ifdef OBIOCSETIF
case OBIOCSETIF:
#endif
case BIOCSETIF:
mutex_enter(&bpf_mtx);
error = bpf_setif(d, addr);
mutex_exit(&bpf_mtx);
break;
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT: {
struct timeval *tv = addr;
case BIOCGHDRCMPLT: /* get "header already complete" flag */
*(u_int *)addr = d->bd_hdrcmplt;
break;
case BIOCSHDRCMPLT: /* set "header already complete" flag */
d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
break;
/*
* Get packet direction flag
*/
case BIOCGDIRECTION:
*(u_int *)addr = d->bd_direction;
break;
/*
* Set packet direction flag
*/
case BIOCSDIRECTION: {
u_int direction;
direction = *(u_int *)addr;
switch (direction) {
case BPF_D_IN:
case BPF_D_INOUT:
case BPF_D_OUT:
d->bd_direction = direction;
break;
default:
error = EINVAL;
}
}
break;
/*
* Set "feed packets from bpf back to input" mode
*/
case BIOCSFEEDBACK:
d->bd_feedback = *(u_int *)addr;
break;
/*
* Get "feed packets from bpf back to input" mode
*/
case BIOCGFEEDBACK:
*(u_int *)addr = d->bd_feedback;
break;
case FIONBIO: /* Non-blocking I/O */
/*
* No need to do anything special as we use IO_NDELAY in
* bpfread() as an indication of whether or not to block
* the read.
*/
break;
case FIOASYNC: /* Send signal on receive packets */
mutex_enter(d->bd_mtx);
d->bd_async = *(int *)addr;
mutex_exit(d->bd_mtx);
break;
case TIOCSPGRP: /* Process or group to send signals to */
case FIOSETOWN:
error = fsetown(&d->bd_pgid, cmd, addr);
break;
case TIOCGPGRP:
case FIOGETOWN:
error = fgetown(d->bd_pgid, cmd, addr);
break;
}
return (error);
}
/*
* Set d's packet filter program to fp. If this file already has a filter,
* free it and replace it. Returns EINVAL for bogus requests.
*/
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
{
struct bpf_insn *fcode;
bpfjit_func_t jcode;
size_t flen, size = 0;
struct bpf_filter *oldf, *newf, **storef;
if (flen) {
/*
* Allocate the buffer, copy the byte-code from
* userspace and validate it.
*/
size = flen * sizeof(*fp->bf_insns);
fcode = kmem_alloc(size, KM_SLEEP);
if (copyin(fp->bf_insns, fcode, size) != 0 ||
!bpf_validate(fcode, (int)flen)) {
kmem_free(fcode, size);
return EINVAL;
}
if (bpf_jit)
jcode = bpf_jit_generate(NULL, fcode, flen);
} else {
fcode = NULL;
}
newf = kmem_alloc(sizeof(*newf), KM_SLEEP);
newf->bf_insn = fcode;
newf->bf_size = size;
newf->bf_jitcode = jcode;
if (cmd == BIOCSETF)
d->bd_jitcode = jcode; /* XXX just for kvm(3) users */
/* Need to hold bpf_mtx for pserialize_perform */
mutex_enter(&bpf_mtx);
mutex_enter(d->bd_mtx);
if (cmd == BIOCSETWF) {
oldf = d->bd_wfilter;
storef = &d->bd_wfilter;
} else {
oldf = d->bd_rfilter;
storef = &d->bd_rfilter;
}
atomic_store_release(storef, newf);
reset_d(d);
pserialize_perform(bpf_psz);
mutex_exit(d->bd_mtx);
mutex_exit(&bpf_mtx);
if (oldf != NULL)
bpf_free_filter(oldf);
return 0;
}
/*
* Detach a file from its current interface (if attached at all) and attach
* to the interface indicated by the name stored in ifr.
* Return an errno or 0.
*/
static int
bpf_setif(struct bpf_d *d, struct ifreq *ifr)
{
struct bpf_if *bp;
char *cp;
int unit_seen, i, error;
KASSERT(mutex_owned(&bpf_mtx));
/*
* Make sure the provided name has a unit number, and default
* it to '0' if not specified.
* XXX This is ugly ... do this differently?
*/
unit_seen = 0;
cp = ifr->ifr_name;
cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
while (*cp++)
if (*cp >= '0' && *cp <= '9')
unit_seen = 1;
if (!unit_seen) {
/* Make sure to leave room for the '\0'. */
for (i = 0; i < (IFNAMSIZ - 1); ++i) {
if ((ifr->ifr_name[i] >= 'a' &&
ifr->ifr_name[i] <= 'z') ||
(ifr->ifr_name[i] >= 'A' &&
ifr->ifr_name[i] <= 'Z'))
continue;
ifr->ifr_name[i] = '0';
}
}
/*
* Look through attached interfaces for the named one.
*/
BPF_IFLIST_WRITER_FOREACH(bp) {
struct ifnet *ifp = bp->bif_ifp;
if (ifp == NULL ||
strcmp(ifp->if_xname, ifr->ifr_name) != 0)
continue;
/* skip additional entry */
if (bp->bif_driverp != &ifp->if_bpf)
continue;
/*
* We found the requested interface.
* Allocate the packet buffers if we need to.
* If we're already attached to requested interface,
* just flush the buffer.
*/
/*
* bpf_allocbufs is called only here. bpf_mtx ensures that
* no race condition happen on d->bd_sbuf.
*/
if (d->bd_sbuf == NULL) {
error = bpf_allocbufs(d);
if (error != 0)
return (error);
}
mutex_enter(d->bd_mtx);
if (bp != d->bd_bif) {
if (d->bd_bif) {
/*
* Detach if attached to something else.
*/
bpf_detachd(d);
BPFIF_DLIST_ENTRY_INIT(d);
}
/*
* Support for poll() system call
*
* Return true iff the specific operation will not block indefinitely - with
* the assumption that it is safe to positively acknowledge a request for the
* ability to write to the BPF device.
* Otherwise, return false but make a note that a selnotify() must be done.
*/
static int
bpf_poll(struct file *fp, int events)
{
struct bpf_d *d = fp->f_bpf;
int revents;
/*
* Refresh the PID associated with this bpf file.
*/
mutex_enter(&bpf_mtx);
d->bd_pid = curproc->p_pid;
/*
* Copy data from an mbuf chain into a buffer. This code is derived
* from m_copydata in sys/uipc_mbuf.c.
*/
static void *
bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
{
const struct mbuf *m;
u_int count;
u_char *dst;
m = src_arg;
dst = dst_arg;
while (len > 0) {
if (m == NULL)
panic("bpf_mcpy");
count = uimin(m->m_len, len);
memcpy(dst, mtod(m, const void *), count);
m = m->m_next;
dst += count;
len -= count;
}
return dst_arg;
}
/*
* Dispatch a packet to all the listeners on interface bp.
*
* pkt pointer to the packet, either a data buffer or an mbuf chain
* buflen buffer length, if pkt is a data buffer
* cpfn a function that can copy pkt into the listener's buffer
* pktlen length of the packet
* direction BPF_D_IN or BPF_D_OUT
*/
static inline void
bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
void *pkt, u_int pktlen, u_int buflen, const u_int direction)
{
bool gottime = false;
struct timespec ts;
struct bpf_d *d;
int s;
u_int slen;
KASSERT(!cpu_intr_p());
/*
* Note that the IPL does not have to be raised at this point.
* The only problem that could arise here is that if two different
* interfaces shared any data. This is not the case.
*/
s = pserialize_read_enter();
BPFIF_DLIST_READER_FOREACH(d, bp) {
if (direction == BPF_D_IN) {
if (d->bd_direction == BPF_D_OUT)
continue;
} else { /* BPF_D_OUT */
if (d->bd_direction == BPF_D_IN)
continue;
}
/*
* Incoming linkage from device drivers, when the head of the packet is in
* a buffer, and the tail is in an mbuf chain.
*/
static void
_bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m,
u_int direction)
{
u_int pktlen;
struct mbuf mb;
/*
* Craft on-stack mbuf suitable for passing to bpf_filter.
* Note that we cut corners here; we only set up what's
* absolutely needed--this mbuf should never go anywhere else.
*/
(void)memset(&mb, 0, sizeof(mb));
mb.m_type = MT_DATA;
mb.m_next = m;
mb.m_data = data;
mb.m_len = dlen;
/*
* We need to prepend the address family as
* a four byte field. Cons up a dummy header
* to pacify bpf. This is safe because bpf
* will only read from the mbuf (i.e., it won't
* try to free it or keep a pointer a to it).
*/
static void
_bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m, u_int direction)
{
struct mbuf m0;
/*
* Put the SLIP pseudo-"link header" in place.
* Note this M_PREPEND() should never fail,
* since we know we always have enough space
* in the input buffer.
*/
static void
_bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
{
u_char *hp;
M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
if (*m == NULL)
return;
/*
* Put the SLIP pseudo-"link header" in
* place. The compressed header is now
* at the beginning of the mbuf.
*/
static void
_bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
{
struct mbuf m0;
u_char *hp;
/* XXX NOMPSAFE: assumed running on one CPU */
s = splnet();
m = bp->bif_mbuf_head;
if (m != NULL) {
bp->bif_mbuf_head = m->m_nextpkt;
m->m_nextpkt = NULL;
if (bp->bif_mbuf_head == NULL)
bp->bif_mbuf_tail = NULL;
#ifdef BPF_MTAP_SOFTINT_DEBUG
log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n",
__func__, m, bp->bif_ifp->if_xname);
#endif
}
splx(s);
/* To avoid extra invocations of the softint */
if (BPFIF_DLIST_READER_EMPTY(bp))
return;
KASSERT(bp->bif_si != NULL);
dup = bpf_mbuf_enqueue(bp, m);
if (dup != NULL)
softint_schedule(bp->bif_si);
}
static int
bpf_hdrlen(struct bpf_d *d)
{
int hdrlen = d->bd_bif->bif_hdrlen;
/*
* Compute the length of the bpf header. This is not necessarily
* equal to SIZEOF_BPF_HDR because we want to insert spacing such
* that the network layer header begins on a longword boundary (for
* performance reasons and to alleviate alignment restrictions).
*/
#ifdef _LP64
if (d->bd_compat32)
return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
else
#endif
return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
}
/*
* Move the packet data from interface memory (pkt) into the
* store buffer. Call the wakeup functions if it's time to wake up
* a listener (buffer full), "cpfn" is the routine called to do the
* actual data transfer. memcpy is passed in to copy contiguous chunks,
* while bpf_mcpy is passed in to copy mbuf chains. In the latter case,
* pkt is really an mbuf.
*/
static void
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
{
char *h;
int totlen, curlen, caplen;
int hdrlen = bpf_hdrlen(d);
int do_wakeup = 0;
atomic_inc_ulong(&d->bd_ccount);
BPF_STATINC(capt);
/*
* Figure out how many bytes to move. If the packet is
* greater or equal to the snapshot length, transfer that
* much. Otherwise, transfer the whole packet (unless
* we hit the buffer size limit).
*/
totlen = hdrlen + uimin(snaplen, pktlen);
if (totlen > d->bd_bufsize)
totlen = d->bd_bufsize;
/*
* If we adjusted totlen to fit the bufsize, it could be that
* totlen is smaller than hdrlen because of the link layer header.
*/
caplen = totlen - hdrlen;
if (caplen < 0)
caplen = 0;
mutex_enter(d->bd_buf_mtx);
/*
* Round up the end of the previous packet to the next longword.
*/
#ifdef _LP64
if (d->bd_compat32)
curlen = BPF_WORDALIGN32(d->bd_slen);
else
#endif
curlen = BPF_WORDALIGN(d->bd_slen);
if (curlen + totlen > d->bd_bufsize) {
/*
* This packet will overflow the storage buffer.
* Rotate the buffers if we can, then wakeup any
* pending reads.
*/
if (d->bd_fbuf == NULL) {
mutex_exit(d->bd_buf_mtx);
/*
* We haven't completed the previous read yet,
* so drop the packet.
*/
atomic_inc_ulong(&d->bd_dcount);
BPF_STATINC(drop);
return;
}
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
/*
* Immediate mode is set, or the read timeout has
* already expired during a select call. A packet
* arrived, so the reader should be woken up.
*/
do_wakeup = 1;
}
/*
* Append the bpf header.
*/
h = (char *)d->bd_sbuf + curlen;
#ifdef _LP64
if (d->bd_compat32) {
struct bpf_hdr32 *hp32;
/*
* Copy the packet data into the store buffer and update its length.
*/
(*cpfn)(h + hdrlen, pkt, caplen);
d->bd_slen = curlen + totlen;
/*
* Call bpf_wakeup after bd_slen has been updated so that kevent(2)
* will cause filt_bpfread() to be called with it adjusted.
*/
if (do_wakeup)
bpf_wakeup(d);
mutex_exit(d->bd_buf_mtx);
}
/*
* Initialize all nonzero fields of a descriptor.
*/
static int
bpf_allocbufs(struct bpf_d *d)
{
if (filter->bf_insn != NULL)
kmem_free(filter->bf_insn, filter->bf_size);
if (filter->bf_jitcode != NULL)
bpf_jit_freecode(filter->bf_jitcode);
kmem_free(filter, sizeof(*filter));
}
/*
* Free buffers currently in use by a descriptor.
* Called on close.
*/
static void
bpf_freed(struct bpf_d *d)
{
/*
* We don't need to lock out interrupts since this descriptor has
* been detached from its interface and it yet hasn't been marked
* free.
*/
if (d->bd_sbuf != NULL) {
kmem_free(d->bd_sbuf, d->bd_bufsize);
if (d->bd_hbuf != NULL)
kmem_free(d->bd_hbuf, d->bd_bufsize);
if (d->bd_fbuf != NULL)
kmem_free(d->bd_fbuf, d->bd_bufsize);
}
if (d->bd_rfilter != NULL) {
bpf_free_filter(d->bd_rfilter);
d->bd_rfilter = NULL;
}
if (d->bd_wfilter != NULL) {
bpf_free_filter(d->bd_wfilter);
d->bd_wfilter = NULL;
}
d->bd_jitcode = NULL;
}
/*
* Attach an interface to bpf. dlt is the link layer type;
* hdrlen is the fixed size of the link header for the specified dlt
* (variable length headers not yet supported).
*/
static void
_bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
{
struct bpf_if *bp;
if (bp == NULL)
panic("%s: no bpf_if found for %s", __func__, ifp->if_xname);
}
/*
* Remove an interface from bpf.
*/
static void
_bpfdetach(struct ifnet *ifp)
{
struct bpf_if *bp;
struct bpf_d *d;
int s;
mutex_enter(&bpf_mtx);
/* Nuke the vnodes for any open instances */
again_d:
BPF_DLIST_WRITER_FOREACH(d) {
mutex_enter(d->bd_mtx);
if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
/*
* Detach the descriptor from an interface now.
* It will be free'ed later by close routine.
*/
bpf_detachd(d);
mutex_exit(d->bd_mtx);
goto again_d;
}
mutex_exit(d->bd_mtx);
}
again:
BPF_IFLIST_WRITER_FOREACH(bp) {
if (bp->bif_ifp == ifp) {
BPF_IFLIST_WRITER_REMOVE(bp);
BPF_IFLIST_ENTRY_DESTROY(bp);
if (bp->bif_si != NULL) {
/* XXX NOMPSAFE: assumed running on one CPU */
s = splnet();
while (bp->bif_mbuf_head != NULL) {
struct mbuf *m = bp->bif_mbuf_head;
bp->bif_mbuf_head = m->m_nextpkt;
m_freem(m);
}
splx(s);
softint_disestablish(bp->bif_si);
}
kmem_free(bp, sizeof(*bp));
goto again;
}
}
mutex_exit(&bpf_mtx);
}
/*
* Change the data link type of a interface.
*/
static void
_bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
{
struct bpf_if *bp;
mutex_enter(&bpf_mtx);
BPF_IFLIST_WRITER_FOREACH(bp) {
if (bp->bif_driverp == &ifp->if_bpf)
break;
}
if (bp == NULL)
panic("bpf_change_type");
bp->bif_dlt = dlt;
bp->bif_hdrlen = hdrlen;
mutex_exit(&bpf_mtx);
}
/*
* Get a list of available data link type of the interface.
*/
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
int n, error;
struct ifnet *ifp;
struct bpf_if *bp;
int s, bound;
KASSERT(mutex_owned(d->bd_mtx));
ifp = d->bd_bif->bif_ifp;
n = 0;
error = 0;
bound = curlwp_bind();
s = pserialize_read_enter();
BPF_IFLIST_READER_FOREACH(bp) {
if (bp->bif_ifp != ifp)
continue;
if (bfl->bfl_list != NULL) {
struct psref psref;
if (n >= bfl->bfl_len) {
pserialize_read_exit(s);
return ENOMEM;
}
error = copyout(&bp->bif_dlt,
bfl->bfl_list + n, sizeof(u_int));
s = pserialize_read_enter();
bpf_if_release(bp, &psref);
}
n++;
}
pserialize_read_exit(s);
curlwp_bindx(bound);
bfl->bfl_len = n;
return error;
}
/*
* Set the data link type of a BPF instance.
*/
static int
bpf_setdlt(struct bpf_d *d, u_int dlt)
{
int error, opromisc;
struct ifnet *ifp;
struct bpf_if *bp;
case MODULE_CMD_FINI:
/*
* While there is no reference counting for bpf callers,
* unload could at least in theory be done similarly to
* system call disestablishment. This should even be
* a little simpler:
*
* 1) replace op vector with stubs
* 2) post update to all cpus with xc
* 3) check that nobody is in bpf anymore
* (it's doubtful we'd want something like l_sysent,
* but we could do something like *signed* percpu
* counters. if the sum is 0, we're good).
* 4) if fail, unroll changes
*
* NOTE: change won't be atomic to the outside. some
* packets may be not captured even if unload is
* not successful. I think packet capture not working
* is a perfectly logical consequence of trying to
* disable packet capture.
*/
error = EOPNOTSUPP;
break;