/*      $NetBSD: locking.c,v 1.1.1.3 2009/12/02 00:26:25 haad Exp $     */

/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "lib.h"
#include "locking.h"
#include "locking_types.h"
#include "lvm-string.h"
#include "activate.h"
#include "toolcontext.h"
#include "memlock.h"
#include "defaults.h"
#include "lvmcache.h"

#include <assert.h>
#include <signal.h>
#include <sys/stat.h>
#include <limits.h>
#include <unistd.h>

static struct locking_type _locking;
static sigset_t _oldset;

static int _vg_lock_count = 0;          /* Number of locks held */
static int _vg_write_lock_held = 0;     /* VG write lock held? */
static int _signals_blocked = 0;
static int _blocking_supported = 0;

static volatile sig_atomic_t _sigint_caught = 0;
static volatile sig_atomic_t _handler_installed;
static struct sigaction _oldhandler;
static int _oldmasked;

typedef enum {
       LV_NOOP,
       LV_SUSPEND,
       LV_RESUME
} lv_operation_t;

static void _catch_sigint(int unused __attribute__((unused)))
{
       _sigint_caught = 1;
}

int sigint_caught(void) {
       return _sigint_caught;
}

void sigint_clear(void)
{
       _sigint_caught = 0;
}

/*
* Temporarily allow keyboard interrupts to be intercepted and noted;
* saves interrupt handler state for sigint_restore().  Users should
* use the sigint_caught() predicate to check whether interrupt was
* requested and act appropriately.  Interrupt flags are never
* cleared automatically by this code, but the tools clear the flag
* before running each command in lvm_run_command().  All other places
* where the flag needs to be cleared need to call sigint_clear().
*/

void sigint_allow(void)
{
       struct sigaction handler;
       sigset_t sigs;

       /*
        * Do not overwrite the backed-up handler data -
        * just increase nesting count.
        */
       if (_handler_installed) {
               _handler_installed++;
               return;
       }

       /* Grab old sigaction for SIGINT: shall not fail. */
       sigaction(SIGINT, NULL, &handler);
       handler.sa_flags &= ~SA_RESTART; /* Clear restart flag */
       handler.sa_handler = _catch_sigint;

       _handler_installed = 1;

       /* Override the signal handler: shall not fail. */
       sigaction(SIGINT, &handler, &_oldhandler);

       /* Unmask SIGINT.  Remember to mask it again on restore. */
       sigprocmask(0, NULL, &sigs);
       if ((_oldmasked = sigismember(&sigs, SIGINT))) {
               sigdelset(&sigs, SIGINT);
               sigprocmask(SIG_SETMASK, &sigs, NULL);
       }
}

void sigint_restore(void)
{
       if (!_handler_installed)
               return;

       if (_handler_installed > 1) {
               _handler_installed--;
               return;
       }

       /* Nesting count went down to 0. */
       _handler_installed = 0;

       if (_oldmasked) {
               sigset_t sigs;
               sigprocmask(0, NULL, &sigs);
               sigaddset(&sigs, SIGINT);
               sigprocmask(SIG_SETMASK, &sigs, NULL);
       }

       sigaction(SIGINT, &_oldhandler, NULL);
}

static void _block_signals(uint32_t flags __attribute((unused)))
{
       sigset_t set;

       if (_signals_blocked)
               return;

       if (sigfillset(&set)) {
               log_sys_error("sigfillset", "_block_signals");
               return;
       }

       if (sigprocmask(SIG_SETMASK, &set, &_oldset)) {
               log_sys_error("sigprocmask", "_block_signals");
               return;
       }

       _signals_blocked = 1;

       return;
}

static void _unblock_signals(void)
{
       /* Don't unblock signals while any locks are held */
       if (!_signals_blocked || _vg_lock_count)
               return;

       if (sigprocmask(SIG_SETMASK, &_oldset, NULL)) {
               log_sys_error("sigprocmask", "_block_signals");
               return;
       }

       _signals_blocked = 0;

       return;
}

static void _lock_memory(lv_operation_t lv_op)
{
       if (!(_locking.flags & LCK_PRE_MEMLOCK))
               return;

       if (lv_op == LV_SUSPEND)
               memlock_inc();
}

static void _unlock_memory(lv_operation_t lv_op)
{
       if (!(_locking.flags & LCK_PRE_MEMLOCK))
               return;

       if (lv_op == LV_RESUME)
               memlock_dec();
}

void reset_locking(void)
{
       int was_locked = _vg_lock_count;

       _vg_lock_count = 0;
       _vg_write_lock_held = 0;

       _locking.reset_locking();

       if (was_locked)
               _unblock_signals();
}

static void _update_vg_lock_count(const char *resource, uint32_t flags)
{
       /* Ignore locks not associated with updating VG metadata */
       if ((flags & LCK_SCOPE_MASK) != LCK_VG ||
           (flags & LCK_CACHE) ||
           !strcmp(resource, VG_GLOBAL))
               return;

       if ((flags & LCK_TYPE_MASK) == LCK_UNLOCK)
               _vg_lock_count--;
       else
               _vg_lock_count++;

       /* We don't bother to reset this until all VG locks are dropped */
       if ((flags & LCK_TYPE_MASK) == LCK_WRITE)
               _vg_write_lock_held = 1;
       else if (!_vg_lock_count)
               _vg_write_lock_held = 0;
}

/*
* Select a locking type
* type: locking type; if < 0, then read config tree value
*/
int init_locking(int type, struct cmd_context *cmd)
{
       if (type < 0)
               type = find_config_tree_int(cmd, "global/locking_type", 1);

       _blocking_supported = find_config_tree_int(cmd,
           "global/wait_for_locks", DEFAULT_WAIT_FOR_LOCKS);

       switch (type) {
       case 0:
               init_no_locking(&_locking, cmd);
               log_warn("WARNING: Locking disabled. Be careful! "
                         "This could corrupt your metadata.");
               return 1;

       case 1:
               log_very_verbose("%sFile-based locking selected.",
                                _blocking_supported ? "" : "Non-blocking ");

               if (!init_file_locking(&_locking, cmd))
                       break;
               return 1;

#ifdef HAVE_LIBDL
       case 2:
               if (!is_static()) {
                       log_very_verbose("External locking selected.");
                       if (init_external_locking(&_locking, cmd))
                               return 1;
               }
               if (!find_config_tree_int(cmd, "locking/fallback_to_clustered_locking",
                           find_config_tree_int(cmd, "global/fallback_to_clustered_locking",
                                                DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING)))
                       break;
#endif

#ifdef CLUSTER_LOCKING_INTERNAL
               log_very_verbose("Falling back to internal clustered locking.");
               /* Fall through */

       case 3:
               log_very_verbose("Cluster locking selected.");
               if (!init_cluster_locking(&_locking, cmd))
                       break;
               return 1;
#endif

       case 4:
               log_verbose("Read-only locking selected. "
                           "Only read operations permitted.");
               if (!init_readonly_locking(&_locking, cmd))
                       break;
               return 1;

       default:
               log_error("Unknown locking type requested.");
               return 0;
       }

       if ((type == 2 || type == 3) &&
           find_config_tree_int(cmd, "locking/fallback_to_local_locking",
                   find_config_tree_int(cmd, "global/fallback_to_local_locking",
                                        DEFAULT_FALLBACK_TO_LOCAL_LOCKING))) {
               log_warn("WARNING: Falling back to local file-based locking.");
               log_warn("Volume Groups with the clustered attribute will "
                         "be inaccessible.");
               if (init_file_locking(&_locking, cmd))
                       return 1;
       }

       if (!ignorelockingfailure())
               return 0;

       log_verbose("Locking disabled - only read operations permitted.");
       init_readonly_locking(&_locking, cmd);

       return 1;
}

void fin_locking(void)
{
       _locking.fin_locking();
}

/*
* Does the LVM1 driver know of this VG name?
*/
int check_lvm1_vg_inactive(struct cmd_context *cmd, const char *vgname)
{
       struct stat info;
       char path[PATH_MAX];

       /* We'll allow operations on orphans */
       if (is_orphan_vg(vgname))
               return 1;

       /* LVM1 is only present in 2.4 kernels. */
       if (strncmp(cmd->kernel_vsn, "2.4.", 4))
               return 1;

       if (dm_snprintf(path, sizeof(path), "%s/lvm/VGs/%s", cmd->proc_dir,
                        vgname) < 0) {
               log_error("LVM1 proc VG pathname too long for %s", vgname);
               return 0;
       }

       if (stat(path, &info) == 0) {
               log_error("%s exists: Is the original LVM driver using "
                         "this volume group?", path);
               return 0;
       } else if (errno != ENOENT && errno != ENOTDIR) {
               log_sys_error("stat", path);
               return 0;
       }

       return 1;
}

/*
* VG locking is by VG name.
* FIXME This should become VG uuid.
*/
static int _lock_vol(struct cmd_context *cmd, const char *resource,
                    uint32_t flags, lv_operation_t lv_op)
{
       int ret = 0;

       _block_signals(flags);
       _lock_memory(lv_op);

       assert(resource);

       if (!*resource) {
               log_error("Internal error: Use of P_orphans is deprecated.");
               return 0;
       }

       if (*resource == '#' && (flags & LCK_CACHE)) {
               log_error("Internal error: P_%s referenced", resource);
               return 0;
       }

       if ((ret = _locking.lock_resource(cmd, resource, flags))) {
               if ((flags & LCK_SCOPE_MASK) == LCK_VG &&
                   !(flags & LCK_CACHE)) {
                       if ((flags & LCK_TYPE_MASK) == LCK_UNLOCK)
                               lvmcache_unlock_vgname(resource);
                       else
                               lvmcache_lock_vgname(resource, (flags & LCK_TYPE_MASK)
                                                               == LCK_READ);
               }

               _update_vg_lock_count(resource, flags);
       }

       _unlock_memory(lv_op);
       _unblock_signals();

       return ret;
}

int lock_vol(struct cmd_context *cmd, const char *vol, uint32_t flags)
{
       char resource[258] __attribute((aligned(8)));
       lv_operation_t lv_op;

       switch (flags & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) {
               case LCK_LV_SUSPEND:
                               lv_op = LV_SUSPEND;
                               break;
               case LCK_LV_RESUME:
                               lv_op = LV_RESUME;
                               break;
               default:        lv_op = LV_NOOP;
       }


       if (flags == LCK_NONE) {
               log_debug("Internal error: %s: LCK_NONE lock requested", vol);
               return 1;
       }

       switch (flags & LCK_SCOPE_MASK) {
       case LCK_VG:
               /*
                * Automatically set LCK_NONBLOCK if one or more VGs locked.
                * This will enforce correctness and prevent deadlocks rather
                * than relying on the caller to set the flag properly.
                */
               if (!_blocking_supported || vgs_locked())
                       flags |= LCK_NONBLOCK;

               if (vol[0] != '#' &&
                   ((flags & LCK_TYPE_MASK) != LCK_UNLOCK) &&
                   (!(flags & LCK_CACHE)) &&
                   !lvmcache_verify_lock_order(vol))
                       return 0;

               /* Lock VG to change on-disk metadata. */
               /* If LVM1 driver knows about the VG, it can't be accessed. */
               if (!check_lvm1_vg_inactive(cmd, vol))
                       return 0;
               break;
       case LCK_LV:
               /* All LV locks are non-blocking. */
               flags |= LCK_NONBLOCK;
               break;
       default:
               log_error("Unrecognised lock scope: %d",
                         flags & LCK_SCOPE_MASK);
               return 0;
       }

       strncpy(resource, vol, sizeof(resource));

       if (!_lock_vol(cmd, resource, flags, lv_op))
               return 0;

       /*
        * If a real lock was acquired (i.e. not LCK_CACHE),
        * perform an immediate unlock unless LCK_HOLD was requested.
        */
       if (!(flags & LCK_CACHE) && !(flags & LCK_HOLD) &&
           ((flags & LCK_TYPE_MASK) != LCK_UNLOCK)) {
               if (!_lock_vol(cmd, resource,
                              (flags & ~LCK_TYPE_MASK) | LCK_UNLOCK, lv_op))
                       return 0;
       }

       return 1;
}

/* Unlock list of LVs */
int resume_lvs(struct cmd_context *cmd, struct dm_list *lvs)
{
       struct lv_list *lvl;

       dm_list_iterate_items(lvl, lvs)
               resume_lv(cmd, lvl->lv);

       return 1;
}

/* Lock a list of LVs */
int suspend_lvs(struct cmd_context *cmd, struct dm_list *lvs)
{
       struct dm_list *lvh;
       struct lv_list *lvl;

       dm_list_iterate_items(lvl, lvs) {
               if (!suspend_lv(cmd, lvl->lv)) {
                       log_error("Failed to suspend %s", lvl->lv->name);
                       dm_list_uniterate(lvh, lvs, &lvl->list) {
                               lvl = dm_list_item(lvh, struct lv_list);
                               resume_lv(cmd, lvl->lv);
                       }

                       return 0;
               }
       }

       return 1;
}

/* Lock a list of LVs */
int activate_lvs(struct cmd_context *cmd, struct dm_list *lvs, unsigned exclusive)
{
       struct dm_list *lvh;
       struct lv_list *lvl;

       dm_list_iterate_items(lvl, lvs) {
               if (!exclusive) {
                       if (!activate_lv(cmd, lvl->lv)) {
                               log_error("Failed to activate %s", lvl->lv->name);
                               return 0;
                       }
               } else if (!activate_lv_excl(cmd, lvl->lv)) {
                       log_error("Failed to activate %s", lvl->lv->name);
                       dm_list_uniterate(lvh, lvs, &lvl->list) {
                               lvl = dm_list_item(lvh, struct lv_list);
                               activate_lv(cmd, lvl->lv);
                       }
                       return 0;
               }
       }

       return 1;
}

int vg_write_lock_held(void)
{
       return _vg_write_lock_held;
}

int locking_is_clustered(void)
{
       return (_locking.flags & LCK_CLUSTERED) ? 1 : 0;
}

int remote_lock_held(const char *vol)
{
       int mode = LCK_NULL;

       if (!locking_is_clustered())
               return 0;

       if (!_locking.query_resource)
               return -1;

       /*
        * If an error occured, expect that volume is active
        */
       if (!_locking.query_resource(vol, &mode)) {
               stack;
               return 1;
       }

       return mode == LCK_NULL ? 0 : 1;
}