/*      Id: compat_fts.c,v 1.17 2020/06/15 01:37:14 schwarze Exp        */
/*      $OpenBSD: fts.c,v 1.59 2019/06/28 13:32:41 deraadt Exp $        */

/*-
* Copyright (c) 1990, 1993, 1994
*      The Regents of the University of California.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
*    may be used to endorse or promote products derived from this software
*    without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"

#include <sys/stat.h>
#include <sys/types.h>

#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "compat_fts.h"

#define MAXIMUM(a, b)   (((a) > (b)) ? (a) : (b))

static FTSENT   *fts_alloc(FTS *, const char *, size_t);
static FTSENT   *fts_build(FTS *);
static void      fts_lfree(FTSENT *);
static void      fts_load(FTS *, FTSENT *);
static size_t    fts_maxarglen(char * const *);
static void      fts_padjust(FTS *, FTSENT *);
static int       fts_palloc(FTS *, size_t);
static FTSENT   *fts_sort(FTS *, FTSENT *, int);
static unsigned short    fts_stat(FTS *, FTSENT *);

typedef int (*qsort_compar_proto)(const void *, const void *);

#define ISDOT(a)        (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
#ifndef O_CLOEXEC
#define O_CLOEXEC       0
#endif

#define CLR(opt)        (sp->fts_options &= ~(opt))
#define ISSET(opt)      (sp->fts_options & (opt))
#define SET(opt)        (sp->fts_options |= (opt))

FTS *
fts_open(char * const *argv, int options,
   int (*compar)(const FTSENT **, const FTSENT **))
{
       FTS *sp;
       FTSENT *p, *root;
       int nitems;
       FTSENT *parent, *prev;

       /* Options check. */
       if (options & ~FTS_OPTIONMASK) {
               errno = EINVAL;
               return (NULL);
       }

       /* At least one path must be specified. */
       if (*argv == NULL) {
               errno = EINVAL;
               return (NULL);
       }

       /* Allocate/initialize the stream */
       if ((sp = calloc(1, sizeof(FTS))) == NULL)
               return (NULL);
       sp->fts_compar = compar;
       sp->fts_options = options;

       /*
        * Start out with 1K of path space, and enough, in any case,
        * to hold the user's paths.
        */
       if (fts_palloc(sp, MAXIMUM(fts_maxarglen(argv), PATH_MAX)))
               goto mem1;

       /* Allocate/initialize root's parent. */
       if ((parent = fts_alloc(sp, "", 0)) == NULL)
               goto mem2;
       parent->fts_level = FTS_ROOTPARENTLEVEL;

       /* Allocate/initialize root(s). */
       for (root = prev = NULL, nitems = 0; *argv; ++argv, ++nitems) {
               if ((p = fts_alloc(sp, *argv, strlen(*argv))) == NULL)
                       goto mem3;
               p->fts_level = FTS_ROOTLEVEL;
               p->fts_parent = parent;
               p->fts_accpath = p->fts_name;
               p->fts_info = fts_stat(sp, p);

               /* Command-line "." and ".." are real directories. */
               if (p->fts_info == FTS_DOT)
                       p->fts_info = FTS_D;

               /*
                * If comparison routine supplied, traverse in sorted
                * order; otherwise traverse in the order specified.
                */
               if (compar) {
                       p->fts_link = root;
                       root = p;
               } else {
                       p->fts_link = NULL;
                       if (root == NULL)
                               root = p;
                       else
                               prev->fts_link = p;
                       prev = p;
               }
       }
       if (compar && nitems > 1)
               root = fts_sort(sp, root, nitems);

       /*
        * Allocate a dummy pointer and make fts_read think that we've just
        * finished the node before the root(s); set p->fts_info to FTS_INIT
        * so that everything about the "current" node is ignored.
        */
       if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
               goto mem3;
       sp->fts_cur->fts_link = root;
       sp->fts_cur->fts_info = FTS_INIT;

       if (nitems == 0)
               free(parent);

       return (sp);

mem3:   fts_lfree(root);
       free(parent);
mem2:   free(sp->fts_path);
mem1:   free(sp);
       return (NULL);
}

static void
fts_load(FTS *sp, FTSENT *p)
{
       size_t len;
       char *cp;

       /*
        * Load the stream structure for the next traversal.  Since we don't
        * actually enter the directory until after the preorder visit, set
        * the fts_accpath field specially so the chdir gets done to the right
        * place and the user can access the first node.  From fts_open it's
        * known that the path will fit.
        */
       len = p->fts_pathlen = p->fts_namelen;
       memmove(sp->fts_path, p->fts_name, len + 1);
       if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
               len = strlen(++cp);
               memmove(p->fts_name, cp, len + 1);
               p->fts_namelen = len;
       }
       p->fts_accpath = p->fts_path = sp->fts_path;
       sp->fts_dev = p->fts_dev;
}

int
fts_close(FTS *sp)
{
       FTSENT *freep, *p;

       /*
        * This still works if we haven't read anything -- the dummy structure
        * points to the root list, so we step through to the end of the root
        * list which has a valid parent pointer.
        */
       if (sp->fts_cur) {
               for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
                       freep = p;
                       p = p->fts_link ? p->fts_link : p->fts_parent;
                       free(freep);
               }
               free(p);
       }

       /* Free up child linked list, sort array, path buffer, stream ptr.*/
       if (sp->fts_child)
               fts_lfree(sp->fts_child);
       free(sp->fts_array);
       free(sp->fts_path);
       free(sp);

       return (0);
}

/*
* Special case of "/" at the end of the path so that slashes aren't
* appended which would cause paths to be written as "....//foo".
*/
#define NAPPEND(p)                                                      \
       (p->fts_path[p->fts_pathlen - 1] == '/'                         \
           ? p->fts_pathlen - 1 : p->fts_pathlen)

FTSENT *
fts_read(FTS *sp)
{
       FTSENT *p, *tmp;
       int instr;
       char *t;

       /* If finished or unrecoverable error, return NULL. */
       if (sp->fts_cur == NULL || ISSET(FTS_STOP))
               return (NULL);

       /* Set current node pointer. */
       p = sp->fts_cur;

       /* Save and zero out user instructions. */
       instr = p->fts_instr;
       p->fts_instr = FTS_NOINSTR;

       /* Directory in pre-order. */
       if (p->fts_info == FTS_D) {
               /* If skipped or crossed mount point, do post-order visit. */
               if (instr == FTS_SKIP ||
                   (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
                       if (sp->fts_child) {
                               fts_lfree(sp->fts_child);
                               sp->fts_child = NULL;
                       }
                       p->fts_info = FTS_DP;
                       return (p);
               }

               /*
                * If haven't read do so.  If the read fails, fts_build sets
                * FTS_STOP or the fts_info field of the node.
                */
               if (sp->fts_child) {
                       /* nothing */
               } else if ((sp->fts_child = fts_build(sp)) == NULL) {
                       if (ISSET(FTS_STOP))
                               return (NULL);
                       return (p);
               }
               p = sp->fts_child;
               sp->fts_child = NULL;
               goto name;
       }

       /* Move to the next node on this level. */
next:   tmp = p;
       if ((p = p->fts_link)) {
               free(tmp);

               /*
                * If reached the top, return to the original directory (or
                * the root of the tree), and load the paths for the next root.
                */
               if (p->fts_level == FTS_ROOTLEVEL) {
                       fts_load(sp, p);
                       return (sp->fts_cur = p);
               }

               /*
                * User may have called fts_set on the node.  If skipped,
                * ignore.  If followed, get a file descriptor so we can
                * get back if necessary.
                */
               if (p->fts_instr == FTS_SKIP)
                       goto next;

name:           t = sp->fts_path + NAPPEND(p->fts_parent);
               *t++ = '/';
               memmove(t, p->fts_name, p->fts_namelen + 1);
               return (sp->fts_cur = p);
       }

       /* Move up to the parent node. */
       p = tmp->fts_parent;
       free(tmp);

       if (p->fts_level == FTS_ROOTPARENTLEVEL) {
               /*
                * Done; free everything up and set errno to 0 so the user
                * can distinguish between error and EOF.
                */
               free(p);
               errno = 0;
               return (sp->fts_cur = NULL);
       }

       /* NUL terminate the pathname. */
       sp->fts_path[p->fts_pathlen] = '\0';

       p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
       return (sp->fts_cur = p);
}

/*
* Fts_set takes the stream as an argument although it's not used in this
* implementation; it would be necessary if anyone wanted to add global
* semantics to fts using fts_set.  An error return is allowed for similar
* reasons.
*/
int
fts_set(FTS *sp, FTSENT *p, int instr)
{
       if (instr && instr != FTS_NOINSTR && instr != FTS_SKIP) {
               errno = EINVAL;
               return (1);
       }
       p->fts_instr = instr;
       return (0);
}

/*
* This is the tricky part -- do not casually change *anything* in here.  The
* idea is to build the linked list of entries that are used by fts_children
* and fts_read.  There are lots of special cases.
*
* The real slowdown in walking the tree is the stat calls.  If FTS_NOSTAT is
* set and it's a physical walk (so that symbolic links can't be directories),
* we can do things quickly.  First, if it's a 4.4BSD file system, the type
* of the file is in the directory entry.  Otherwise, we assume that the number
* of subdirectories in a node is equal to the number of links to the parent.
* The former skips all stat calls.  The latter skips stat calls in any leaf
* directories and for any files after the subdirectories in the directory have
* been found, cutting the stat calls by about 2/3.
*/
static FTSENT *
fts_build(FTS *sp)
{
       struct dirent *dp;
       FTSENT *p, *head;
       FTSENT *cur, *tail;
       DIR *dirp;
       void *oldaddr;
       size_t dlen, len, maxlen;
       int nitems, level, doadjust;
       int saved_errno;
       char *cp;

       /* Set current node pointer. */
       cur = sp->fts_cur;

       /*
        * Open the directory for reading.  If this fails, we're done.
        * If being called from fts_read, set the fts_info field.
        */
       if ((dirp = opendir(cur->fts_accpath)) == NULL) {
               cur->fts_info = FTS_DNR;
               cur->fts_errno = errno;
               return (NULL);
       }

       /*
        * Figure out the max file name length that can be stored in the
        * current path -- the inner loop allocates more path as necessary.
        * We really wouldn't have to do the maxlen calculations here, we
        * could do them in fts_read before returning the path, but it's a
        * lot easier here since the length is part of the dirent structure.
        *
        * If not changing directories set a pointer so that can just append
        * each new name into the path.
        */
       len = NAPPEND(cur);
       cp = sp->fts_path + len;
       *cp++ = '/';
       len++;
       maxlen = sp->fts_pathlen - len;

       /*
        * fts_level is signed so we must prevent it from wrapping
        * around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL.
        */
       level = cur->fts_level;
       if (level < FTS_MAXLEVEL)
           level++;

       /* Read the directory, attaching each entry to the `link' pointer. */
       doadjust = 0;
       for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) {
               if (ISDOT(dp->d_name))
                       continue;

#if HAVE_DIRENT_NAMLEN
               dlen = dp->d_namlen;
#else
               dlen = strlen(dp->d_name);
#endif

               if (!(p = fts_alloc(sp, dp->d_name, dlen)))
                       goto mem1;
               if (dlen >= maxlen) {   /* include space for NUL */
                       oldaddr = sp->fts_path;
                       if (fts_palloc(sp, dlen + len + 1)) {
                               /*
                                * No more memory for path or structures.  Save
                                * errno, free up the current structure and the
                                * structures already allocated.
                                */
mem1:                           saved_errno = errno;
                               free(p);
                               fts_lfree(head);
                               (void)closedir(dirp);
                               cur->fts_info = FTS_ERR;
                               SET(FTS_STOP);
                               errno = saved_errno;
                               return (NULL);
                       }
                       /* Did realloc() change the pointer? */
                       if (oldaddr != sp->fts_path) {
                               doadjust = 1;
                               cp = sp->fts_path + len;
                       }
                       maxlen = sp->fts_pathlen - len;
               }

               p->fts_level = level;
               p->fts_parent = sp->fts_cur;
               p->fts_pathlen = len + dlen;
               if (p->fts_pathlen < len) {
                       /*
                        * If we wrap, free up the current structure and
                        * the structures already allocated, then error
                        * out with ENAMETOOLONG.
                        */
                       free(p);
                       fts_lfree(head);
                       (void)closedir(dirp);
                       cur->fts_info = FTS_ERR;
                       SET(FTS_STOP);
                       errno = ENAMETOOLONG;
                       return (NULL);
               }

               /* Build a file name for fts_stat to stat. */
               p->fts_accpath = p->fts_path;
               memmove(cp, p->fts_name, p->fts_namelen + 1);
               /* Stat it. */
               p->fts_info = fts_stat(sp, p);

               /* We walk in directory order so "ls -f" doesn't get upset. */
               p->fts_link = NULL;
               if (head == NULL)
                       head = tail = p;
               else {
                       tail->fts_link = p;
                       tail = p;
               }
               ++nitems;
       }
       if (dirp)
               (void)closedir(dirp);

       /*
        * If realloc() changed the address of the path, adjust the
        * addresses for the rest of the tree and the dir list.
        */
       if (doadjust)
               fts_padjust(sp, head);

       /*
        * If not changing directories, reset the path back to original
        * state.
        */
       if (len == sp->fts_pathlen || nitems == 0)
               --cp;
       *cp = '\0';

       /* If didn't find anything, return NULL. */
       if (!nitems) {
               cur->fts_info = FTS_DP;
               return (NULL);
       }

       /* Sort the entries. */
       if (sp->fts_compar && nitems > 1)
               head = fts_sort(sp, head, nitems);
       return (head);
}

static unsigned short
fts_stat(FTS *sp, FTSENT *p)
{
       FTSENT *t;
       dev_t dev;
       ino_t ino;
       struct stat *sbp;

       /* If user needs stat info, stat buffer already allocated. */
       sbp = p->fts_statp;

       if (lstat(p->fts_accpath, sbp)) {
               p->fts_errno = errno;
               memset(sbp, 0, sizeof(struct stat));
               return (FTS_NS);
       }

       if (S_ISDIR(sbp->st_mode)) {
               /*
                * Set the device/inode.  Used to find cycles and check for
                * crossing mount points.  Also remember the link count, used
                * in fts_build to limit the number of stat calls.  It is
                * understood that these fields are only referenced if fts_info
                * is set to FTS_D.
                */
               dev = p->fts_dev = sbp->st_dev;
               ino = p->fts_ino = sbp->st_ino;
               p->fts_nlink = sbp->st_nlink;

               if (ISDOT(p->fts_name))
                       return (FTS_DOT);

               /*
                * Cycle detection is done by brute force when the directory
                * is first encountered.  If the tree gets deep enough or the
                * number of symbolic links to directories is high enough,
                * something faster might be worthwhile.
                */
               for (t = p->fts_parent;
                   t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
                       if (ino == t->fts_ino && dev == t->fts_dev) {
                               p->fts_cycle = t;
                               return (FTS_DC);
                       }
               return (FTS_D);
       }
       if (S_ISLNK(sbp->st_mode))
               return (FTS_SL);
       if (S_ISREG(sbp->st_mode))
               return (FTS_F);
       return (FTS_DEFAULT);
}

static FTSENT *
fts_sort(FTS *sp, FTSENT *head, int nitems)
{
       FTSENT **ap, *p;

       /*
        * Construct an array of pointers to the structures and call qsort(3).
        * Reassemble the array in the order returned by qsort.  If unable to
        * sort for memory reasons, return the directory entries in their
        * current order.  Allocate enough space for the current needs plus
        * 40 so don't realloc one entry at a time.
        */
       if (nitems > sp->fts_nitems) {
               struct _ftsent **a;

               if ((a = reallocarray(sp->fts_array,
                   nitems + 40, sizeof(FTSENT *))) == NULL) {
                       free(sp->fts_array);
                       sp->fts_array = NULL;
                       sp->fts_nitems = 0;
                       return (head);
               }
               sp->fts_nitems = nitems + 40;
               sp->fts_array = a;
       }
       for (ap = sp->fts_array, p = head; p; p = p->fts_link)
               *ap++ = p;
       qsort(sp->fts_array, nitems, sizeof(FTSENT *),
           (qsort_compar_proto)sp->fts_compar);
       for (head = *(ap = sp->fts_array); --nitems; ++ap)
               ap[0]->fts_link = ap[1];
       ap[0]->fts_link = NULL;
       return (head);
}

static FTSENT *
fts_alloc(FTS *sp, const char *name, size_t namelen)
{
       FTSENT *p;
       size_t len;

       len = sizeof(FTSENT) + namelen;
       if ((p = calloc(1, len)) == NULL)
               return (NULL);

       p->fts_path = sp->fts_path;
       p->fts_namelen = namelen;
       p->fts_instr = FTS_NOINSTR;
       p->fts_statp = malloc(sizeof(struct stat));
       if (p->fts_statp == NULL) {
               free(p);
               return (NULL);
       }
       memcpy(p->fts_name, name, namelen);

       return (p);
}

static void
fts_lfree(FTSENT *head)
{
       FTSENT *p;

       /* Free a linked list of structures. */
       while ((p = head)) {
               head = head->fts_link;
               free(p);
       }
}

/*
* Allow essentially unlimited paths; find, rm, ls should all work on any tree.
* Most systems will allow creation of paths much longer than PATH_MAX, even
* though the kernel won't resolve them.  Add the size (not just what's needed)
* plus 256 bytes so don't realloc the path 2 bytes at a time.
*/
static int
fts_palloc(FTS *sp, size_t more)
{
       char *p;

       /*
        * Check for possible wraparound.
        */
       more += 256;
       if (sp->fts_pathlen + more < sp->fts_pathlen) {
               free(sp->fts_path);
               sp->fts_path = NULL;
               errno = ENAMETOOLONG;
               return (1);
       }
       p = recallocarray(sp->fts_path, sp->fts_pathlen,
           sp->fts_pathlen + more, 1);
       if (p == NULL) {
               free(sp->fts_path);
               sp->fts_path = NULL;
               return (1);
       }
       sp->fts_pathlen += more;
       sp->fts_path = p;
       return (0);
}

/*
* When the path is realloc'd, have to fix all of the pointers in structures
* already returned.
*/
static void
fts_padjust(FTS *sp, FTSENT *head)
{
       FTSENT *p;
       char *addr = sp->fts_path;

#define ADJUST(p) {                                                     \
       if ((p)->fts_accpath != (p)->fts_name) {                        \
               (p)->fts_accpath =                                      \
                   (char *)addr + ((p)->fts_accpath - (p)->fts_path);  \
       }                                                               \
       (p)->fts_path = addr;                                           \
}
       /* Adjust the current set of children. */
       for (p = sp->fts_child; p; p = p->fts_link)
               ADJUST(p);

       /* Adjust the rest of the tree, including the current level. */
       for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
               ADJUST(p);
               p = p->fts_link ? p->fts_link : p->fts_parent;
       }
}

static size_t
fts_maxarglen(char * const *argv)
{
       size_t len, max;

       for (max = 0; *argv; ++argv)
               if ((len = strlen(*argv)) > max)
                       max = len;
       return (max + 1);
}