Logo Search packages:      
Sourcecode: zmailer version File versions  Download package

expand.c

/*
 *    Copyright 1989 by Rayan S. Zachariassen, all rights reserved.
 *    This will be free software, but only when it is finished.
 */

/*
 * Filename expansion (globbing) routines.  The expand() function is called
 * just before pushing an argv onto a command descriptor in the interpreter.
 * As a side-effect, multiple buffers are mashed into one, and word separation
 * using IFS characters is also done here.
 */

#include "hostenv.h"
#include <stdio.h>
#include <sys/file.h>
#include <sys/stat.h>

#ifdef HAVE_DIRENT_H
# include <dirent.h>
#else /* not HAVE_DIRENT_H */
# define dirent direct
# ifdef HAVE_SYS_NDIR_H
#  include <sys/ndir.h>
# endif /* HAVE_SYS_NDIR_H */
# ifdef HAVE_SYS_DIR_H
#  include <sys/dir.h>
# endif /* HAVE_SYS_DIR_H */
# ifdef HAVE_NDIR_H
#  include <ndir.h>
# endif /* HAVE_NDIR_H */
#endif /* HAVE_DIRENT_H */

#include "sh.h"
#include "flags.h"
#include "zmalloc.h"
#include "listutils.h"
#include "io.h"               /* redefines stdio routines */
#include "shconfig.h"

#include "libsh.h"

/*
 * For speed we look up magic characters (*, ?, and [) to check whether
 * a word requires filename globbing.  The globchars array contains such flags.
 */

char globchars[CHARSETSIZE];

/*
 * Initialize the array, called from main(), once.
 */

void
glob_init()
{
      int i;
      for (i = 0; i < CHARSETSIZE; ++i)
        globchars[i] = 0;

      globchars['*'] = globchars['?'] = globchars['['] = 1;
      /* see also sJumpIfMatch in interpreter for globchars['|'] */
}

/*
 * Because the result of filename expansion must be presented in alphabetical
 * order, we have to keep the pathnames somewhere temporarily to do a qsort().
 * Since we don't know how many there will be, we maintain a linked list of
 * (struct sawpath), then turn that into an array that we can sort easily.
 */

struct sawpath {
      struct sawpath *next;
      char  array[1];   /* is really array[strlen(path)+1] */
};


STATIC int glut __((char *cwd, char *pwd, int *pb, int recur, struct sawpath **swp));

/*
 * Normally we don't want to descend into symlink'ed directories, but in
 * case you do, this is where you change it.  It is INADVISABLE to use
 * stat() since in that case a super-glob can lead to infinite recursion.
 */

#ifdef      HAVE_LSTAT
extern int lstat();
static int (*statfcn)() = lstat;    /* stat if following symlinks, o/w lstat */
#else /* !HAVE_LSTAT */
extern int stat();
static int (*statfcn)() = stat;
#endif      /* HAVE_LSTAT */

/*
 * Qsort() comparison function to sort pathnames.
 */

int
pathcmp(ap, bp)
     const void *ap, *bp;
{
      register const void **a = (const void **)ap;
      register const void **b = (const void **)bp;

      /* sort to reverse order, easier to collate
         into an object chain */

      return -strcmp(*a, *b);
}

/* note that | is going to be used instead of / in some pathname examples */

/*
 * Super-glob.  This is exactly like glob except that the sequence: |**|
 * in a pathname will match any number of levels of directories.  Its an
 * old idea of mine so here's a wonderful opportunity to express myself within
 * the confines of sh!  The major thing to note is that quoted characters
 * do not qualify for globbing.  How does one know if a character is quoted?
 * Each character is stored in an int, the u_char value is obtained using
 * the BYTE() macro, and the quotedness is determined by the QUOTEBYTE bit
 * in the int.
 */

#define BYTE(X)         ((X) & 0xFF)
#define     QUOTEBYTE   (0x8000)

STATIC conscell * sglob __((int *));

STATIC conscell *
sglob(ibuf)
      int *ibuf;  /* unglobbed pathname w/ each byte stored as int */
{
      register int i, n;
      conscell *pp, *cc = NULL;
      struct sawpath *spp;
      struct sawpath head;
      char  *pwd,       /* points to end of current directory in cwd */
            **base,           /* array of expanded filenames, for sorting */
            cwd[4096];  /* all pathnames are constructed in cwd */
      GCVARS1;

      if (BYTE(ibuf[0]) == '/') {
            cwd[0] = '/';
            pwd = cwd+1;
      } else if (BYTE(ibuf[0]) == '.' && BYTE(ibuf[1]) == '/') {
            cwd[0] = '.';
            cwd[1] = '/';
            pwd = cwd+2;
            ibuf += 2;
      } else
            pwd = cwd;

      *pwd = '\0';
      head.next = NULL;
      spp = &head;
      if (BYTE(ibuf[0]) != 0)
        n = glut(cwd, pwd, ibuf, 0, &spp);      /* do expansion */
      else
        goto leave;
      
      if (n <= 0)
        goto leave;

#ifdef USE_ALLOCA
      base = (char **)alloca((sizeof (char *))*n);
#else
      base = (char **)malloc((sizeof (char *))*n);
#endif
      i = 0;
      for (spp = head.next; spp != NULL ; spp = spp->next)
            base[i++] = spp->array;
      qsort(base, n, sizeof base[0], pathcmp);
      /* construct a sorted linked list */
      cc = NULL;
      GCPRO1(cc);
      for (i = 0; i < n; ++i) {
            int slen = strlen(base[i]);
            pp = newstring(dupnstr(base[i],slen),slen);
            cdr(pp) = cc;
            cc = pp;
            /* printf("saw %s\n", base[i]); */
      }
      UNGCPRO1;
#ifndef USE_ALLOCA
      free(base);
#endif
 leave:
      spp = head.next;
      while (spp != NULL) {
        struct sawpath *spp2 = spp->next;
        free(spp);
        spp = spp2;
      }
      return cc;
}

/*
 * Utility function to append a pathname to a linked list for later sorting.
 */

STATIC struct sawpath * stash __((const void *, int, struct sawpath *));
STATIC struct sawpath *
stash(s, len, ps)
      const void *s;          /* the pathname we want to stash away */
      int len;          /* its length */
      struct sawpath *ps;     /* the previous list element */
{
      register struct sawpath *spp;
      
      spp = (struct sawpath *)malloc(sizeof (struct sawpath) + len);
      ps->next = spp;
      spp->next = NULL;
      memcpy(spp->array, s, len);
      spp->array[len] = 0;
      return spp;
}

STATIC int kleene[] = { '*', '/', 0 };    /* foo|**|  becomes foo|**|*| */

/*
 * This routine is the recursive workhorse of the filename globbing.
 */

static int
glut(cwd, pwd, bp, recur, swp)
      char  *cwd,       /* always the same buffer */
            *pwd;       /* pointer to end of directories/ inside cwd */
      int   *bp,        /* next character in the name to glob */
            recur;            /* flag: superglob mode, deep dir. descend */
      struct sawpath **swp;
{
      int   *start,           /* beginning of simple file name to expand */
            *eoname,    /* end of same */
            *ip,
            havepattern,      /* the simple file name contains glob chars */
            flag,       /* remember to put the trailing / on cwd back */
            count,            /* number of expansions */
            namlen,           /* filename length */
            i;
      struct stat stbuf;
      struct dirent *dp;
      DIR *dirp = NULL;
      
      if (interrupted)
            return 0;
      if (pwd > cwd+1) {
            *--pwd = '\0';
            flag = 1;
      } else
            flag = 0;
      /* printf("%s:\n", cwd); */
      /*
       * must do stat since assuming opendir will
       * fail on files might not be portable.
       */
      if ((*cwd == '\0' && statfcn(".", &stbuf) < 0) ||
          (*cwd != '\0' && statfcn(cwd, &stbuf) < 0) ||
          (stbuf.st_mode & S_IFMT) != S_IFDIR)
            return -1;
again:
      while (*bp != '\0' && BYTE(*bp) == '/')
            ++bp;
      if (*bp == '\0') {
            if (recur)  /* we're at the end of the road of a |**| */
                  bp = kleene;
            else {
                  *swp = stash(cwd, (pwd - cwd), *swp);
                  return 1;
            }
      }
      if (*bp == '*' && *(bp+1) == '*' && BYTE(*(bp+2)) == '/') {
            recur = 1;  /* superglob mode */
            bp += 2;
            if (*(bp+1))
                  goto again;
            else
                  ++bp; /* start and eoname will point at 0 */
      }
      start = bp;
      while (*bp != '\0' && BYTE(*bp) != '/')
            ++bp;
      eoname = bp;

      /*
       * Now we have a local name between start and eoname, relative to
       * the directory we have open. search through the directory for
       * that name, then do the whole thing again, recursively.
       */
      
      havepattern = count = 0;
      /* optimization... is it worth globbing in inner loop far below? */
      for (ip = start; ip < eoname; ++ip)
            if (*ip == '*' || *ip == '?' || *ip == '[') {
                  havepattern = 1;
                  break;
            }
      if ((*cwd == '\0' && (dirp = opendir(".")) == NULL) ||
          (*cwd != '\0' && (dirp = opendir((char *)cwd)) == NULL)) {
            perror((char *)cwd);
            return 0;
      }
      if (flag) {
            *pwd++ = '/';
            *pwd = '\0';
      }

      /* major loop */

      for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {

            /* a * won't match .files */
            if (*start == '*' && start == eoname - 1
                && dp->d_name[0] == '.')
                  continue;

            /* if we can't match the simple name, forget it */
            if (!recur &&
                glob_match(start, eoname, dp->d_name) == 0)
                  continue;
            strcpy((char *)pwd, dp->d_name);
            namlen = strlen(dp->d_name);
            if (recur && !(dp->d_name[0] == '.' && (dp->d_name[1] == '\0'
                  || (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))) {
                  /* in superglob mode... glut returns -1 if is a file */
                  i = glut(cwd, pwd+namlen+1, start, recur, swp);
                  if (i >= 0) {
                        count += i;
                        strcpy((char *)pwd, dp->d_name);
                        if (*start != 0 && start != kleene)
                              continue;
                        ++count;
                        *swp = stash(cwd, (int)((pwd+namlen)-cwd), *swp);
                  }
            }
            if (*eoname == 0) {     /* end of the globbable name */
                  /* we aren't interested in descending directories */
                  if (!recur ||
                      glob_match(start, eoname, dp->d_name))
                        ++count,
                        *swp = stash(cwd, (int)((pwd+namlen)-cwd), *swp);
            } else if (!recur) {
                  /* we are only interested in directories */
                  i = glut(cwd, pwd+namlen+1, eoname, recur, swp);
                  count += (i > 0 ? i : 0);
            }
            if (recur || havepattern)
                  continue;
            else
                  break;
      }
#ifdef      BUGGY_CLOSEDIR
      /*
       * Major serious bug time here;  some closedir()'s
       * free dirp before referring to dirp->dd_fd. GRRR.
       * XX: remove this when bug is eradicated from System V's.
       */
      close(dirp->dd_fd);
#endif
      closedir(dirp);
      return count;
}

/*
 * This is a heavily recursive globbing function, it uses the string (which
 * is actually an int array as mentioned above) as a DFA that it interprets.
 * This routine should be kept in sync with the case-statement globbing
 * in the interpreter.  Unfortunately the requirements are different enough
 * that sharing code would be hard.
 */

int
glob_match(pattern, eopattern, s)
      register int      *pattern, *eopattern;
      register const char     *s;
{
      register int i, i2, sense;

      while (eopattern == NULL || pattern < eopattern) {
            switch (*pattern) {
            case '*':
                  while (*pattern == '*')
                        pattern++;
                  do {
                        if (glob_match(pattern, eopattern, s))
                              return 1;
                  } while (*s++ != '\0');
                  return 0;
            case '[':
                  if (*s == '\0')
                        return 0;
                  sense = (*(pattern+1) != '!');
                  if (!sense)
                    ++pattern;
                  while ((*++pattern != ']') && (*pattern != *s)) {
                    if (pattern == eopattern)
                      return !sense;
                    if (*(pattern+1) == '-'
                        && (i2 = BYTE(*(pattern+2))) != ']'
                        && i2 != 0) {
                      i2 = (i2 < 128) ? i2 : 127;
                      for (i = BYTE(*pattern)+1; i <= i2; i++)
                        if (i == *s) {
                        if (sense)
                          goto ok;
                        else
                          return 0;
                        }
                      pattern += 2;
                    }
                  }
                  if ((*pattern == ']') == sense)
                        return 0;
ok:
                  while (*pattern++ != ']')
                        if (pattern == eopattern)
                              return 0;
                  s++;
                  break;
            case '?':
                  if (*s == '\0')
                        return 0;
                  s++;
                  pattern++;
                  break;
            case '\0':
                  return (*s == '\0');
            default:
                  if (BYTE(*pattern++) != *s++)
                        return 0;
            }
      }
      return (*s == '\0');
}

/* 
 * Mash the linked list of buffers passed into a single buffer on return.
 */

int
squish(d, bufp, ibufp, doglob)
      conscell *d; /* input is protected */
      char **bufp;
      int **ibufp, doglob;
{
      register char *cp, *bp;
      register int *ip;
      register int sawglob, mask, len;
      register conscell *l;
      char *buf;
      int *ibuf;

      if ((LIST(d) || ISQUOTED(d)) && cdr(d) == NULL)
        return -1;
      /* how much space will unexpanded concatenation of buffers take? */
      for (l = d, len = 0, sawglob = 0; l != NULL; l = cdr(l)) {
        int slen = l->slen;
        if (l->string == NULL || slen == 0)
          continue;
        len += slen;
        if (doglob) {
          /* We do glob processing */
          cp = l->string;
          if (!sawglob) {
            while (slen > 0) {
            if (globchars[BYTE(*cp)] != 0 &&
                !(cp == d->string &&
                  *cp == '[' && *(d->string+1) == '\0')) {
              ++sawglob;
              break;
            }
            ++cp; --slen;
            }
          }
        }
      }

      /* f option disables filename generation */
      sawglob = (sawglob != 0); /* zero/one value space */
      if (doglob > 0) /* doglob == -1 for 'case' matching */
        if (isset('f')) sawglob = 0;

      if (!sawglob && cdr(d) == NULL) {
        return -1;
      }

      /* allocate something large enough to hold integer per char */
      *bufp = buf = (char *)malloc((len+1)*(sawglob ? sizeof(int) : 1));
      *ibufp = ibuf = (int *)buf;

      for (l = d, bp = buf, ip = ibuf; l != NULL; l = cdr(l)) {
        if (l->string) {
          if (sawglob) {
            int slen = l->slen;
            /*
             * Create int array with quoted characters
             * marked by the QUOTEBYTE bit.
             */
            mask = ISQUOTED(l) ? QUOTEBYTE : 0;
            for (cp = l->string; slen > 0; --slen,++cp) {
#if 0
            if (*cp == '\\' && *(cp+1) != '\0')
              *ip++ = BYTE(*++cp) | QUOTEBYTE;
            else
#endif
              *ip++ = BYTE(*cp)   | mask;
            }
          } else if (ISQUOTED(l)) {
            if (l->slen > 0) {
            memcpy(bp, l->string, l->slen);
            bp += l->slen;
            }
          } else {
            int slen = l->slen;
            for (cp = l->string; slen > 0; ++cp, --slen) {
            if (*cp == '\\' && slen > 1)
              ++cp;
            *bp++ = *cp;
            }
          }
        }
      }
      if (sawglob)
        *ip = 0;
      else
        *bp = '\0';
      return sawglob;
}

/*
 * Given a buffer list, check all non-quoted non-list portions for
 * file globbing characters and if relevant perform filename expansion.
 * The caller relies on the return value never being NULL.
 */

STATIC conscell * csglob __((conscell *, int doglob));
STATIC conscell *
csglob(d, doglob)
      conscell *d; int doglob;
{
      register char *bp;
      register int *ip;
      conscell *tmp;
      char *buf = NULL;
      int *ibuf;
      int s, slen;

      s = squish(d, &buf, &ibuf, doglob);
      switch (s) {
      case -1:
            /* if (buf) free(buf); -- no allocations; ever */
            return d;
      case 1:
            tmp = sglob(ibuf);
            if (tmp != NULL) {
              free(buf);
              return tmp;
            }
            for (bp = buf, ip = ibuf; *ip != '\0'; ++ip)
                  *bp++ = BYTE(*ip);
            *bp = '\0';
            /* FALLTHROUGH */
            /* (re)filled the buffer, can throw away
               the conscell chain in 'tmp'. */
      case 0:
            slen = strlen(buf);
            tmp = newstring(dupnstr(buf,slen),slen);
            free(buf);
            break;
      default:
            abort();
      }

      return tmp;
}

extern conscell *
sh_glob(avl,il)
     conscell *avl, *il;
{
      il = cdar(avl);
      if (il == NULL || !STRING(il)) {
            fprintf(stderr, "Usage: %s glob-patter-string\n",
                        car(avl)->string);
            return NULL;
      }
      /* Always do the glob, never mind if 'set -f' is in effect! */
      il = csglob(il, -1);

      for (avl = il; avl != NULL; avl = cdr(avl))
        if (STRING(avl))
          avl->flags |= (QUOTEDSTRING|ELEMENT);

      return il;
}


/*
 * This function is called with the unexpanded buffer contents (usually
 * a linked list of strings) just prior to it being added as a command
 * argument.  Expansion involves scanning unquoted strings for whitespace
 * (as defined by IFS) and breaking those apart into multiple argv's,
 * as well as filename globbing of the resulting unquoted strings.
 * The return value is a list of argv's.
 *
 * The input array is safe to be munched in place!  No need to copy it!
 */


conscell *
expand(d, variant)
      conscell *d; /* input protected */
      int variant;
{
      conscell *tmp, *head, *next, *orig;
      conscell *globbed = NULL, **pav;
      register char *cp, *cp0;
      int slen, slen0;
      GCVARS6;


      /* Ok, following is MAGIC - sVariablePush when the
         data is $(elements ..) produced list */

      if (variant == 2 && ISELEMENT(d)) return d;



      tmp = head = next = orig = globbed = NULL;
      GCPRO6(tmp, head, next, orig, globbed, d);

      /* grindef("EXP = ", d); */

      /* *NOT* copying at first with s_copy_tree -- 13 % of total system
         runtime is in THAT call from THIS function! */

#if 0
      d = s_copy_chain(d); /* Copy the LINEAR CHAIN, not the whole tree! */
#endif

      orig = d;
      pav = &globbed;
      for (head = d; d != NULL; d = next) {
            if (head == NULL)
                  head = d;
            next = cdr(d);
            if (LIST(d) || ISQUOTED(d)) {
                  continue;
            } else if (ISELEMENT(d)) {
                  if (head != d) {
                        cdr(head) = NULL;
                        head = *pav = csglob(head,0);
                        while (cdr(head) != NULL) head = cdr(head);
                        pav = &cdr(head);
                  }
                  head = NULL;
                  d->flags &= ~ELEMENT;
                  *pav = d;
                  pav = &cdr(d);
                  continue;
            }
            /* null strings should be retained */
            /* fprintf(stderr,"checking '%s'\n", d->string); */
            cp0 = cp = d->string;
            slen0 = slen = d->slen;
            if (head == d) {
                  /* skip leading whitespace */
                  while (slen > 0 && WHITESPACE(*cp)) ++cp, --slen;
                  cp0 = cp;
            }
            while (slen > 0) {
              if (WHITESPACE(*cp)) {
                /* Two possible cases: cp0 == d->string, which means
                   the stuff is from the beginning of the string, and
                   playing with d->slen does work.
                   The second case is when the input string contains
                   possibly several white space separated substrings:
                   "aa bb cc", and this is second white-space part, or
                   later.. */
                if (cp0 == d->string) {
                  /* Variant first.. */
                  int s0len = d->slen;
                  d->slen = (cp - d->string);
                  ++cp;
                  d->slen = (slen0 - slen);
                  --slen;
                  cdr(d) = NULL;
                  /* wrap the stuff at head into its own argv */
                  /* printf("wrapped '%s'\n", d->string); */
                  tmp = *pav = csglob(head,0);
                  while (cdr(tmp)) tmp = cdr(tmp);
                  if (tmp == d) {
                  /* Crap ! That stuff didn't expand */
                  ;
                  } else
                  d->slen = s0len;
                  pav = & cdr(tmp);
                } else {
                  /* Variant second... */
                  /* This means also that the previous text bit
                   IS 'head' -- or can be set as it! */
                  head = newstring(dupnstr(cp0,cp-cp0),cp-cp0);
                  cdr(head) = NULL;
                  tmp = *pav = csglob(head,0);
                  while (cdr(tmp)) tmp = cdr(tmp);
                  pav = & cdr(tmp);
                }

                /* now find the continuation */
                while (slen > 0 && WHITESPACE(*cp))
                  ++cp, --slen;

                if (slen == 0) {
                  cp0 = d->string; /* mark to ignore this string */
                  head = NULL;
                  break;
                } else {
                  /* We have more non-white-space stuff
                   following */
                  cp0 = cp;
                }
              }
              ++cp;
              --slen;
            }
            if (d->string != cp0) {
              /* went to the end, and did start in the middle of
                 the thing */
              int sslen = slen0 - (cp0 - d->string);
              head = newstring(dupnstr(cp0, sslen), sslen);
            }
      }

      if (head != NULL) {
            /* printf("trailing '%s'\n", head->string); */
            /* glob is guaranteed to not return NULL */
            head = *pav = csglob(head,0);
            while (cdr(head) != NULL) head = cdr(head);
            pav = &cdr(head);
      }
      *pav = NULL;

      UNGCPRO6;

      /* fprintf(stderr, "EXPLEN = %d ",globbed->slen);
         grindef("EXPOUT = ", globbed); */

      return globbed;
}

Generated by  Doxygen 1.6.0   Back to index