/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef O_DIRECT_SUPPORT
# ifndef O_DIRECT
# error O_DIRECT support configured but O_DIRECT definition not found in headers
# endif
#endif
static DM_LIST_INIT(_open_devices);
/*-----------------------------------------------------------------
* The standard io loop that keeps submitting an io until it's
* all gone.
*---------------------------------------------------------------*/
static int _io(struct device_area *where, void *buffer, int should_write)
{
int fd = dev_fd(where->dev);
ssize_t n = 0;
size_t total = 0;
if (fd < 0) {
log_error("Attempt to read an unopened device (%s).",
dev_name(where->dev));
return 0;
}
/*
* Skip all writes in test mode.
*/
if (should_write && test_mode())
return 1;
if (where->size > SSIZE_MAX) {
log_error("Read size too large: %" PRIu64, where->size);
return 0;
}
while (total < (size_t) where->size) {
do
n = should_write ?
write(fd, buffer, (size_t) where->size - total) :
read(fd, buffer, (size_t) where->size - total);
while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
if (n < 0)
log_error("%s: %s failed after %" PRIu64 " of %" PRIu64
" at %" PRIu64 ": %s", dev_name(where->dev),
should_write ? "write" : "read",
(uint64_t) total,
(uint64_t) where->size,
(uint64_t) where->start, strerror(errno));
if (n <= 0)
break;
total += n;
buffer += n;
}
return (total == (size_t) where->size);
}
/*-----------------------------------------------------------------
* LVM2 uses O_DIRECT when performing metadata io, which requires
* block size aligned accesses. If any io is not aligned we have
* to perform the io via a bounce buffer, obviously this is quite
* inefficient.
*---------------------------------------------------------------*/
/*
* Get the sector size from an _open_ device.
*/
static int _get_block_size(struct device *dev, unsigned int *size)
{
const char *name = dev_name(dev);
#ifdef __NetBSD__
struct disklabel lab;
prop_dictionary_t disk_dict, geom_dict;
uint32_t secsize;
#endif
/*
* Widens a region to be an aligned region.
*/
static void _widen_region(unsigned int block_size, struct device_area *region,
struct device_area *result)
{
uint64_t mask = block_size - 1, delta;
memcpy(result, region, sizeof(*result));
/* adjust the start */
delta = result->start & mask;
if (delta) {
result->start -= delta;
result->size += delta;
}
/* adjust the end */
delta = (result->start + result->size) & mask;
if (delta)
result->size += block_size - delta;
}
static int _aligned_io(struct device_area *where, void *buffer,
int should_write)
{
void *bounce;
unsigned int block_size = 0;
uintptr_t mask;
struct device_area widened;
if (!(where->dev->flags & DEV_REGULAR) &&
!_get_block_size(where->dev, &block_size))
return_0;
if (!block_size)
block_size = lvm_getpagesize();
_widen_region(block_size, where, &widened);
/* Do we need to use a bounce buffer? */
mask = block_size - 1;
if (!memcmp(where, &widened, sizeof(widened)) &&
!((uintptr_t) buffer & mask))
return _io(where, buffer, should_write);
/* Allocate a bounce buffer with an extra block */
if (!(bounce = alloca((size_t) widened.size + block_size))) {
log_error("Bounce buffer alloca failed");
return 0;
}
/*
* Realign start of bounce buffer (using the extra sector)
*/
if (((uintptr_t) bounce) & mask)
bounce = (void *) ((((uintptr_t) bounce) + mask) & ~mask);
/* channel the io through the bounce buffer */
if (!_io(&widened, bounce, 0)) {
if (!should_write)
return_0;
/* FIXME pre-extend the file */
memset(bounce, '\n', widened.size);
}
#ifdef __NetBSD__
/* Get info about partition/wedge */
if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == -1) {
if (ioctl(fd, DIOCGDINFO, &lab) == -1) {
log_debug("Please implement DIOCGWEDGEINFO or "
"DIOCGDINFO for disk device %s", name);
close(fd);
return 0;
} else {
if (fstat(fd, &stat) < 0)
log_debug("fstat on device %s failure", name);
/*-----------------------------------------------------------------
* Public functions
*---------------------------------------------------------------*/
int dev_get_size(const struct device *dev, uint64_t *size)
{
if (!dev)
return 0;
if (dev->open_count && !need_excl) {
/* FIXME Ensure we never get here */
log_debug("WARNING: %s already opened read-only",
dev_name(dev));
dev->open_count++;
}
dev_close_immediate(dev);
}
if (memlock())
log_error("WARNING: dev_open(%s) called while suspended",
dev_name(dev));
if (dev->flags & DEV_REGULAR)
name = dev_name(dev);
else if (!(name = dev_name_confirmed(dev, quiet)))
return_0;
if (!(dev->flags & DEV_REGULAR)) {
if (stat(name, &buf) < 0) {
log_sys_error("%s: stat failed", name);
return 0;
}
if (buf.st_rdev != dev->dev) {
log_error("%s: device changed", name);
return 0;
}
}
#ifdef O_DIRECT_SUPPORT
if (direct) {
if (!(dev->flags & DEV_O_DIRECT_TESTED))
dev->flags |= DEV_O_DIRECT;
if ((dev->flags & DEV_O_DIRECT))
flags |= O_DIRECT;
}
#endif
#ifdef O_NOATIME
/* Don't update atime on device inodes */
if (!(dev->flags & DEV_REGULAR))
flags |= O_NOATIME;
#endif
if ((dev->fd = open(name, flags, 0777)) < 0) {
#ifdef O_DIRECT_SUPPORT
if (direct && !(dev->flags & DEV_O_DIRECT_TESTED)) {
flags &= ~O_DIRECT;
if ((dev->fd = open(name, flags, 0777)) >= 0) {
dev->flags &= ~DEV_O_DIRECT;
log_debug("%s: Not using O_DIRECT", name);
goto opened;
}
}
#endif
if (quiet)
log_sys_debug("open", name);
else
log_sys_error("open", name);
static int _dev_close(struct device *dev, int immediate)
{
struct lvmcache_info *info;
if (dev->fd < 0) {
log_error("Attempt to close device '%s' "
"which is not open.", dev_name(dev));
return 0;
}
#ifndef O_DIRECT_SUPPORT
if (dev->flags & DEV_ACCESSED_W)
dev_flush(dev);
#endif
if (dev->open_count > 0)
dev->open_count--;
if (immediate && dev->open_count)
log_debug("%s: Immediate close attempt while still referenced",
dev_name(dev));
/* Close unless device is known to belong to a locked VG */
if (immediate ||
(dev->open_count < 1 &&
(!(info = info_from_pvid(dev->pvid, 0)) ||
!info->vginfo ||
!vgname_is_locked(info->vginfo->vgname))))
_close(dev);
return 1;
}
int dev_close(struct device *dev)
{
return _dev_close(dev, 0);
}
int dev_close_immediate(struct device *dev)
{
return _dev_close(dev, 1);
}
/*
* Read from 'dev' into 'buf', possibly in 2 distinct regions, denoted
* by (offset,len) and (offset2,len2). Thus, the total size of
* 'buf' should be len+len2.
*/
int dev_read_circular(struct device *dev, uint64_t offset, size_t len,
uint64_t offset2, size_t len2, void *buf)
{
if (!dev_read(dev, offset, len, buf)) {
log_error("Read from %s failed", dev_name(dev));
return 0;
}
/*
* The second region is optional, and allows for
* a circular buffer on the device.
*/
if (!len2)
return 1;
if (!dev_read(dev, offset2, len2, buf + len)) {
log_error("Circular read from %s failed",
dev_name(dev));
return 0;
}
return 1;
}
/* FIXME If O_DIRECT can't extend file, dev_extend first; dev_truncate after.
* But fails if concurrent processes writing
*/
/* FIXME pre-extend the file */
int dev_append(struct device *dev, size_t len, void *buffer)
{
int r;
if (!dev->open_count)
return_0;
r = dev_write(dev, dev->end, len, buffer);
dev->end += (uint64_t) len;