/* $NetBSD: kvm.c,v 1.111 2023/08/23 14:00:11 rin Exp $ */
/*-
* Copyright (c) 1989, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software developed by the Computer Systems
* Engineering group at Lawrence Berkeley Laboratory under DARPA contract
* BG 91-66 and contributed to Berkeley.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Report an error using printf style arguments. "program" is kd->program
* on hard errors, and 0 on soft errors, so that under sun error emulation,
* only hard errors are printed out (otherwise, programs like gdb will
* generate tons of error messages when trying to access bogus pointers).
*/
void
_kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
{
va_list ap;
/*
* Wrapper around the lseek(2) system call; calls _kvm_syserr() for us
* in the event of emergency.
*/
static off_t
Lseek(kvm_t *kd, int fd, off_t offset, int whence)
{
off_t off;
/*
* Wrapper around the pread(2) system call; calls _kvm_syserr() for us
* in the event of emergency.
*/
static ssize_t
Pread(kvm_t *kd, int fd, void *buf, size_t nbytes, off_t offset)
{
ssize_t rv;
/*
* Open the kernel namelist. If /dev/ksyms doesn't
* exist, open the current kernel.
*/
if (ufgiven == 0)
kd->nlfd = open(_PATH_KSYMS, O_RDONLY | O_CLOEXEC, 0);
if (kd->nlfd < 0) {
if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
_kvm_syserr(kd, kd->program, "%s", uf);
goto failed;
}
strlcpy(kd->kernelname, uf, sizeof(kd->kernelname));
} else {
strlcpy(kd->kernelname, _PATH_KSYMS, sizeof(kd->kernelname));
}
if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
_kvm_syserr(kd, kd->program, "%s", mf);
goto failed;
}
if (fstat(kd->pmfd, &st) < 0) {
_kvm_syserr(kd, kd->program, "%s", mf);
goto failed;
}
if (S_ISCHR(st.st_mode) && strcmp(mf, _PATH_MEM) == 0) {
/*
* If this is /dev/mem, open kmem too. (Maybe we should
* make it work for either /dev/mem or /dev/kmem -- in either
* case you're working with a live kernel.)
*/
if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC, 0)) < 0) {
_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
goto failed;
}
kd->alive = KVM_ALIVE_FILES;
if ((kd->swfd = open(sf, flag | O_CLOEXEC, 0)) < 0) {
if (errno != ENXIO) {
_kvm_syserr(kd, kd->program, "%s", sf);
goto failed;
}
/* swap is not configured? not fatal */
}
} else {
if (S_ISCHR(st.st_mode)) {
kd->fdalign = DEV_BSIZE;
} else {
kd->fdalign = 1;
}
/*
* This is a crash dump.
* Initialize the virtual address translation machinery.
*
* If there is no valid core header, fail silently here.
* The address translations however will fail without
* header. Things can be made to run by calling
* kvm_dump_mkheader() before doing any translation.
*/
if (_kvm_get_header(kd) == 0) {
if (_kvm_initvtop(kd) < 0)
goto failed;
}
kd->dump_size = (size_t)st.st_size;
kd->dump_mem = mmap(NULL, kd->dump_size, PROT_READ|PROT_WRITE,
MAP_FILE|MAP_PRIVATE, kd->pmfd, 0);
}
return (kd);
failed:
/*
* Copy out the error if doing sane error semantics.
*/
if (errout != 0)
(void)strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
(void)kvm_close(kd);
return (0);
}
/*
* The kernel dump file (from savecore) contains:
* kcore_hdr_t kcore_hdr;
* kcore_seg_t cpu_hdr;
* (opaque) cpu_data; (size is cpu_hdr.c_size)
* kcore_seg_t mem_hdr;
* (memory) mem_data; (size is mem_hdr.c_size)
*
* Note: khdr is padded to khdr.c_hdrsize;
* cpu_hdr and mem_hdr are padded to khdr.c_seghdrsize
*/
static int
_kvm_get_header(kvm_t *kd)
{
kcore_hdr_t kcore_hdr;
kcore_seg_t cpu_hdr;
kcore_seg_t mem_hdr;
size_t offset;
ssize_t sz;
/*
* Read the kcore_hdr_t
*/
sz = Pread(kd, kd->pmfd, &kcore_hdr, sizeof(kcore_hdr), (off_t)0);
if (sz != sizeof(kcore_hdr))
return (-1);
/*
* Currently, we only support dump-files made by the current
* architecture...
*/
if ((CORE_GETMAGIC(kcore_hdr) != KCORE_MAGIC) ||
(CORE_GETMID(kcore_hdr) != MID_MACHINE))
return (-1);
/*
* Currently, we only support exactly 2 segments: cpu-segment
* and data-segment in exactly that order.
*/
if (kcore_hdr.c_nseg != 2)
return (-1);
/*
* Save away the kcore_hdr. All errors after this
* should do a to "goto fail" to deallocate things.
*/
kd->kcore_hdr = _kvm_malloc(kd, sizeof(kcore_hdr));
memcpy(kd->kcore_hdr, &kcore_hdr, sizeof(kcore_hdr));
offset = kcore_hdr.c_hdrsize;
/*
* Read the CPU segment header
*/
sz = Pread(kd, kd->pmfd, &cpu_hdr, sizeof(cpu_hdr), (off_t)offset);
if (sz != sizeof(cpu_hdr))
goto fail;
if ((CORE_GETMAGIC(cpu_hdr) != KCORESEG_MAGIC) ||
(CORE_GETFLAG(cpu_hdr) != CORE_CPU))
goto fail;
offset += kcore_hdr.c_seghdrsize;
/*
* Read the CPU segment DATA.
*/
kd->cpu_dsize = cpu_hdr.c_size;
kd->cpu_data = _kvm_malloc(kd, cpu_hdr.c_size);
if (kd->cpu_data == NULL)
goto fail;
sz = Pread(kd, kd->pmfd, kd->cpu_data, cpu_hdr.c_size, (off_t)offset);
if (sz != cpu_hdr.c_size)
goto fail;
offset += cpu_hdr.c_size;
/*
* Read the next segment header: data segment
*/
sz = Pread(kd, kd->pmfd, &mem_hdr, sizeof(mem_hdr), (off_t)offset);
if (sz != sizeof(mem_hdr))
goto fail;
offset += kcore_hdr.c_seghdrsize;
if ((CORE_GETMAGIC(mem_hdr) != KCORESEG_MAGIC) ||
(CORE_GETFLAG(mem_hdr) != CORE_DATA))
goto fail;
/*
* The format while on the dump device is: (new format)
* kcore_seg_t cpu_hdr;
* (opaque) cpu_data; (size is cpu_hdr.c_size)
* kcore_seg_t mem_hdr;
* (memory) mem_data; (size is mem_hdr.c_size)
*/
int
kvm_dump_mkheader(kvm_t *kd, off_t dump_off)
{
kcore_seg_t cpu_hdr;
size_t hdr_size;
ssize_t sz;
if (kd->kcore_hdr != NULL) {
_kvm_err(kd, kd->program, "already has a dump header");
return (-1);
}
if (ISALIVE(kd)) {
_kvm_err(kd, kd->program, "don't use on live kernel");
return (-1);
}
/*
* Validate new format crash dump
*/
sz = Pread(kd, kd->pmfd, &cpu_hdr, sizeof(cpu_hdr), dump_off);
if (sz != sizeof(cpu_hdr)) {
if (sz == -1)
_kvm_err(kd, 0, "read %zx bytes at offset %"PRIx64
" for cpu_hdr failed: %s", sizeof(cpu_hdr),
dump_off, strerror(errno));
else
_kvm_err(kd, 0, "read %zx bytes at offset %"PRIx64
" for cpu_hdr instead of requested %zu",
sz, dump_off, sizeof(cpu_hdr));
return (-1);
}
if ((CORE_GETMAGIC(cpu_hdr) != KCORE_MAGIC)
|| (CORE_GETMID(cpu_hdr) != MID_MACHINE)) {
_kvm_err(kd, 0, "invalid magic in cpu_hdr");
return (0);
}
hdr_size = ALIGN(sizeof(cpu_hdr));
while (size > 0) {
len = size > sizeof(buf) ? sizeof(buf) : size;
if (!(*write_buf)(cookie, buf, len)) {
_kvm_syserr(kd, kd->program, "clear_gap");
return -1;
}
size -= len;
}
return 0;
}
/*
* Write the dump header by calling write_buf with cookie as first argument.
*/
int
kvm_dump_header(kvm_t *kd, bool (*write_buf)(void *, const void *, size_t),
void *cookie, int dumpsize)
{
kcore_seg_t seghdr;
long offset;
size_t gap;
if (kd->kcore_hdr == NULL || kd->cpu_data == NULL) {
_kvm_err(kd, kd->program, "no valid dump header(s)");
return (-1);
}
/*
* Write the generic header
*/
offset = 0;
if (!(*write_buf)(cookie, kd->kcore_hdr, sizeof(kcore_hdr_t))) {
_kvm_syserr(kd, kd->program, "kvm_dump_header");
return (-1);
}
offset += kd->kcore_hdr->c_hdrsize;
gap = kd->kcore_hdr->c_hdrsize - sizeof(kcore_hdr_t);
if (clear_gap(kd, write_buf, cookie, gap) == -1)
return (-1);
/*
* Write the CPU header
*/
CORE_SETMAGIC(seghdr, KCORESEG_MAGIC, 0, CORE_CPU);
seghdr.c_size = ALIGN(kd->cpu_dsize);
if (!(*write_buf)(cookie, &seghdr, sizeof(seghdr))) {
_kvm_syserr(kd, kd->program, "kvm_dump_header");
return (-1);
}
offset += kd->kcore_hdr->c_seghdrsize;
gap = kd->kcore_hdr->c_seghdrsize - sizeof(seghdr);
if (clear_gap(kd, write_buf, cookie, gap) == -1)
return (-1);
if (!(*write_buf)(cookie, kd->cpu_data, kd->cpu_dsize)) {
_kvm_syserr(kd, kd->program, "kvm_dump_header");
return (-1);
}
offset += seghdr.c_size;
gap = seghdr.c_size - kd->cpu_dsize;
if (clear_gap(kd, write_buf, cookie, gap) == -1)
return (-1);
/*
* Write the actual dump data segment header
*/
CORE_SETMAGIC(seghdr, KCORESEG_MAGIC, 0, CORE_DATA);
seghdr.c_size = dumpsize;
if (!(*write_buf)(cookie, &seghdr, sizeof(seghdr))) {
_kvm_syserr(kd, kd->program, "kvm_dump_header");
return (-1);
}
offset += kd->kcore_hdr->c_seghdrsize;
gap = kd->kcore_hdr->c_seghdrsize - sizeof(seghdr);
if (clear_gap(kd, write_buf, cookie, gap) == -1)
return (-1);
if (ISKMEM(kd)) {
/*
* We're using /dev/kmem. Just read straight from the
* device and let the active kernel do the address translation.
*/
errno = 0;
cc = _kvm_pread(kd, kd->vmfd, buf, len, (off_t)kva);
if (cc < 0) {
_kvm_syserr(kd, 0, "kvm_read");
return (-1);
} else if (cc < len)
_kvm_err(kd, kd->program, "short read");
return (cc);
} else if (ISSYSCTL(kd)) {
_kvm_err(kd, kd->program, "kvm_open called with KVM_NO_FILES, "
"can't use kvm_read");
return (-1);
} else {
if ((kd->kcore_hdr == NULL) || (kd->cpu_data == NULL)) {
_kvm_err(kd, kd->program, "no valid dump header");
return (-1);
}
cp = buf;
while (len > 0) {
paddr_t pa;
off_t foff;
cc = _kvm_kvatop(kd, (vaddr_t)kva, &pa);
if (cc == 0) {
_kvm_err(kd, kd->program, "_kvm_kvatop(%lx)", kva);
return (-1);
}
if (cc > len)
cc = len;
foff = _kvm_pa2off(kd, pa);
errno = 0;
cc = _kvm_pread(kd, kd->pmfd, cp, (size_t)cc, foff);
if (cc < 0) {
_kvm_syserr(kd, kd->program, "kvm_read");
break;
}
/*
* If kvm_kvatop returns a bogus value or our core
* file is truncated, we might wind up seeking beyond
* the end of the core file in which case the read will
* return 0 (EOF).
*/
if (cc == 0)
break;
cp = (char *)cp + cc;
kva += cc;
len -= cc;
}
return ((char *)cp - (char *)buf);
}
/* NOTREACHED */
}
if (ISKMEM(kd)) {
/*
* Just like kvm_read, only we write.
*/
errno = 0;
cc = pwrite(kd->vmfd, buf, len, (off_t)kva);
if (cc < 0) {
_kvm_syserr(kd, 0, "kvm_write");
return (-1);
} else if (cc < len)
_kvm_err(kd, kd->program, "short write");
return (cc);
} else if (ISSYSCTL(kd)) {
_kvm_err(kd, kd->program, "kvm_open called with KVM_NO_FILES, "
"can't use kvm_write");
return (-1);
} else {
if (kd->dump_mem == MAP_FAILED) {
_kvm_err(kd, kd->program,
"kvm_write not implemented for dead kernels");
return (-1);
}
cp = buf;
while (len > 0) {
paddr_t pa;
off_t foff;
cc = _kvm_kvatop(kd, (vaddr_t)kva, &pa);
if (cc == 0) {
_kvm_err(kd, kd->program, "_kvm_kvatop(%lx)", kva);
return (-1);
}
if (cc > len)
cc = len;
foff = _kvm_pa2off(kd, pa);
errno = 0;
cc = _kvm_pwrite(kd, cp, (size_t)cc, foff);
if (cc < 0) {
_kvm_syserr(kd, kd->program, "kvm_pwrite");
break;
}
/*
* If kvm_kvatop returns a bogus value or our core
* file is truncated, we might wind up seeking beyond
* the end of the core file in which case the read will
* return 0 (EOF).
*/
if (cc == 0)
break;
cp = (const char *)cp + cc;
kva += cc;
len -= cc;
}
return ((const char *)cp - (const char *)buf);
}
/* NOTREACHED */
}