Use wordsplit.[ch] (from grecs) instead of the obsolete argcv.[ch]

author: Sergey Poznyakoff <gray@gnu.org.ua> 2014-02-07 19:40:07 +0200
committer: Sergey Poznyakoff <gray@gnu.org.ua> 2014-02-07 19:40:07 +0200
commit: 4be79061e8f68f6e3174a05452d96f31e8062464 (patch)
tree: c93924dc62e3f71f76cf29f98cb1c8d164e228a8 /src
parent: c9b5abe560c2fe06368cb733df8bcbfdb33a8526 (diff)
download: cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.gz
cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.bz2
6 files changed, 1813 insertions, 480 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index bad0476..e7f0b85 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -18,8 +18,6 @@
 
 bin_PROGRAMS = cflow
 cflow_SOURCES = \
- argcv.c\
- argcv.h\
  c.l\
  cflow.h\
  depmap.c\
@@ -31,7 +29,9 @@ cflow_SOURCES = \
  parser.h\
  posix.c\
  rc.c\
- symbol.c
+ symbol.c\
+ wordsplit.c\
+ wordsplit.h
 
 localedir = $(datadir)/locale
 
diff --git a/src/argcv.c b/src/argcv.c
deleted file mode 100644
index 611cb64..0000000
--- a/src/argcv.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/* argcv.c - simple functions for parsing input based on whitespace
-   Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc.
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <ctype.h>
-#include <errno.h>
-#include <argcv.h>
-
-/*
- * takes a string and splits it into several strings, breaking at ' '
- * command is the string to split
- * the number of strings is placed into argc
- * the split strings are put into argv
- * returns 0 on success, nonzero on failure
- */
-
-#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n')
-#define isdelim(c,delim) (strchr(delim,(c))!=NULL)
-
-static int
-argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
-	    int *start, int *end, int *save)
-{
-  int i = 0;
-  
-  for (;;)
-    {
-      i = *save;
-
-      if (i >= len)
-	return i + 1;
-
-      /* Skip initial whitespace */
-      while (i < len && isws (command[i]))
-	i++;
-      *start = i;
-
-      if (!isdelim (command[i], delim))
-	{
-	  while (i < len)
-	    {
-	      if (command[i] == '\\')
-		{
-		  if (++i == len)
-		    break;
-		  i++;
-		  continue;
-		}
-	      
-	      if (command[i] == '\'' || command[i] == '"')
-		{
-		  int j;
-		  for (j = i+1; j < len && command[j] != command[i]; j++)
-		    if (command[j] == '\\')
-		      j++;
-		  if (j < len)
-		    i = j+1;
-		  else
-		    i++;
-		}
-	      else if (isws (command[i]) || isdelim (command[i], delim))
-		break;
-	      else
-		i++; /* skip the escaped character */
-	    }
-	  i--;
-	}
-
-      *end = i;
-      *save = i + 1;
-
-      /* If we have a token, and it starts with a comment character, skip
-         to the newline and restart the token search. */
-      if (*save <= len)
-	{
-	  if (cmnt && strchr (cmnt, command[*start]) != NULL)
-	    {
-	      i = *save;
-	      while (i < len && command[i] != '\n')
-		i++;
-
-	      *save = i;
-	      continue;
-	    }
-	}
-      break;
-    }
-  return *save;
-}
-
-static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t";
-
-int
-argcv_unquote_char (int c)
-{
-  char *p;
-
-  for (p = quote_transtab; *p; p += 2)
-    {
-      if (*p == c)
-	return p[1];
-    }
-  return c;
-}
-
-int
-argcv_quote_char (int c)
-{
-  char *p;
-  
-  for (p = quote_transtab + sizeof(quote_transtab) - 2;
-       p > quote_transtab; p -= 2)
-    {
-      if (*p == c)
-	return p[-1];
-    }
-  return -1;
-}
-  
-#define to_num(c) \
-  (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 ))
-
-static int
-xtonum (int *pval, const char *src, int base, int cnt)
-{
-  int i, val;
-  
-  for (i = 0, val = 0; i < cnt; i++, src++)
-    {
-      int n = *(unsigned char*)src;
-      if (n > 127 || (n = to_num(n)) >= base)
-	break;
-      val = val*base + n;
-    }
-  *pval = val;
-  return i;
-}
-
-size_t
-argcv_quoted_length (const char *str, int *quote)
-{
-  size_t len = 0;
-
-  *quote = 0;
-  for (; *str; str++)
-    {
-      if (*str == ' ')
-	{
-	  len++;
-	  *quote = 1;
-	}
-      else if (*str == '"' || *str == '\'')
-	{
-	  len += 2;
-	  *quote = 1;
-	}
-      else if (*str != '\t' && *str != '\\' && isprint (*str))
-	len++;
-      else if (argcv_quote_char (*str) != -1)
-	len += 2;
-      else
-	len += 4;
-    }
-  return len;
-}
-
-void
-argcv_unquote_copy (char *dst, const char *src, size_t n)
-{
-  int i = 0;
-  int c;
-  int expect_delim = 0; 
-    
-  while (i < n)
-    {
-      switch (src[i])
-	{
-	case '\'':
-	case '"':
-	  if (!expect_delim)
-	    {
-	      const char *p;
-	      
-	      for (p = src+i+1; *p && *p != src[i]; p++)
-		if (*p == '\\')
-		  p++;
-	      if (*p)
-		expect_delim = src[i++];
-	      else
-		*dst++ = src[i++];
-	    }
-	  else if (expect_delim == src[i])
-	    ++i;
-	  else
-	    *dst++ = src[i++];
-	  break;
-	  
-	case '\\':
-	  ++i;
-	  if (src[i] == 'x' || src[i] == 'X')
-	    {
-	      if (n - i < 2)
-		{
-		  *dst++ = '\\';
-		  *dst++ = src[i++];
-		}
-	      else 
-		{
-		  int off = xtonum(&c, src + i + 1, 16, 2);
-		  if (off == 0)
-		    {
-		      *dst++ = '\\';
-		      *dst++ = src[i++];
-		    }
-		  else
-		    {
-		      *dst++ = c;
-		      i += off + 1;
-		    }
-		}
-	    }
-	  else if ((unsigned char)src[i] < 128 && isdigit(src[i]))
-	    {
-	      if (n - i < 1)
-		{
-		  *dst++ = '\\';
-		  *dst++ = src[i++];
-		}
-	      else
-		{
-		  int off = xtonum(&c, src+i, 8, 3);
-		  if (off == 0)
-		    {
-		      *dst++ = '\\';
-		      *dst++ = src[i++];
-		    }
-		  else
-		    {
-		      *dst++ = c;
-		      i += off;
-		    }
-		}
-	    }
-	  else
-	    *dst++ = argcv_unquote_char (src[i++]);
-	  break;
-	  
-	default:
-	  *dst++ = src[i++];
-	}
-    }
-  *dst = 0;
-}
-
-void
-argcv_quote_copy (char *dst, const char *src)
-{
-  for (; *src; src++)
-    {
-      if (*src == '"' || *src == '\'')
-	{
-	  *dst++ = '\\';
-	  *dst++ = *src;
-	}
-      else if (*src != '\t' && *src != '\\' && isprint(*src))
-	*dst++ = *src;      
-      else
-	{
-	  int c = argcv_quote_char (*src);
-	  *dst++ = '\\';
-	  if (c != -1)
-	    *dst++ = c;
-	  else
-	    {
-	      char tmp[4];
-	      snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src);
-	      memcpy (dst, tmp, 3);
-	      dst += 3;
-	    }
-	}
-    }
-}
-
-int
-argcv_get (const char *command, const char *delim, const char *cmnt,
-	   int *argc, char ***argv)
-{
-  int len = strlen (command);
-  int i = 0;
-  int start, end, save;
-
-  *argv = NULL;
-
-  /* Count number of arguments */
-  *argc = 0;
-  save = 0;
-
-  while (argcv_scan (len, command, delim, cmnt, &start, &end, &save) <= len)
-      (*argc)++;
-
-  *argv = calloc ((*argc + 1), sizeof (char *));
-  if (*argv == NULL)
-    return ENOMEM;
-  
-  i = 0;
-  save = 0;
-  for (i = 0; i < *argc; i++)
-    {
-      int n;
-      argcv_scan (len, command, delim, cmnt, &start, &end, &save);
-
-      if ((command[start] == '"' || command[end] == '\'')
-	  && command[end] == command[start])
-	{
-	  start++;
-	  end--;
-	}
-      n = end - start + 1;
-      (*argv)[i] = calloc (n+1,  sizeof (char));
-      if ((*argv)[i] == NULL)
-	return ENOMEM;
-      argcv_unquote_copy ((*argv)[i], &command[start], n);
-      (*argv)[i][n] = 0;
-    }
-  (*argv)[i] = NULL;
-  return 0;
-}
-
-/*
- * frees all elements of an argv array
- * argc is the number of elements
- * argv is the array
- */
-int
-argcv_free (int argc, char **argv)
-{
-  while (--argc >= 0)
-    if (argv[argc])
-      free (argv[argc]);
-  free (argv);
-  return 0;
-}
-
-/* Take a argv an make string separated by ' '.  */
-
-int
-argcv_string (int argc, char **argv, char **pstring)
-{
-  size_t i, j, len;
-  char *buffer;
-
-  /* No need.  */
-  if (pstring == NULL)
-    return EINVAL;
-
-  buffer = malloc (1);
-  if (buffer == NULL)
-    return ENOMEM;
-  *buffer = '\0';
-
-  for (len = i = j = 0; i < argc; i++)
-    {
-      int quote;
-      int toklen;
-
-      toklen = argcv_quoted_length (argv[i], &quote);
-      
-      len += toklen + 2;
-      if (quote)
-	len += 2;
-      
-      buffer = realloc (buffer, len);
-      if (buffer == NULL)
-        return ENOMEM;
-
-      if (i != 0)
-	buffer[j++] = ' ';
-      if (quote)
-	buffer[j++] = '"';
-      argcv_quote_copy (buffer + j, argv[i]);
-      j += toklen;
-      if (quote)
-	buffer[j++] = '"';
-    }
-
-  for (; j > 0 && isspace (buffer[j-1]); j--)
-    ;
-  buffer[j] = 0;
-  if (pstring)
-    *pstring = buffer;
-  return 0;
-}
-
diff --git a/src/argcv.h b/src/argcv.h
deleted file mode 100644
index c082709..0000000
--- a/src/argcv.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* GNU Mailutils -- a suite of utilities for electronic mail
-   Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc.
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
-
-#ifndef _ARGCV_H
-#define _ARGCV_H 1
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef __P
-# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
-#  define __P(args) args
-# else
-#  define __P(args) ()
-# endif
-#endif /*__P */
-
-extern int argcv_get    __P ((const char *command, const char *delim,
-			      const char* cmnt,
-			      int *argc, char ***argv));
-extern int argcv_string __P ((int argc, char **argv, char **string));
-extern int argcv_free   __P ((int argc, char **argv));
-extern int argcv_unquote_char __P((int c));
-extern int argcv_quote_char   __P((int c));
-extern size_t argcv_quoted_length __P((const char *str, int *quote));
-extern void argcv_unquote_copy __P((char *dst, const char *src, size_t n));
-extern void argcv_quote_copy __P((char *dst, const char *src));
-  
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ARGCV_H */
diff --git a/src/rc.c b/src/rc.c
index bb9d25a..7586975 100644
--- a/src/rc.c
+++ b/src/rc.c
@@ -18,7 +18,7 @@
 #include <parser.h>
 #include <sys/stat.h>
 #include <ctype.h>
-#include <argcv.h>
+#include <wordsplit.h>
 
 #ifndef LOCAL_RC
 # define LOCAL_RC ".cflowrc"
@@ -31,8 +31,9 @@ expand_argcv(int *argc_ptr, char ***argv_ptr, int argc, char **argv)
      
      *argv_ptr = xrealloc(*argv_ptr,
 			  (*argc_ptr + argc + 1) * sizeof **argv_ptr);
-     for (i = 0; i <= argc; i++)
-	  (*argv_ptr)[*argc_ptr + i] = argv[i];
+     for (i = 0; i < argc; i++)
+	  (*argv_ptr)[*argc_ptr + i] = xstrdup(argv[i]);
+     (*argv_ptr)[*argc_ptr + i] = NULL;
      *argc_ptr += argc;
 }
 
@@ -45,6 +46,9 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name)
      FILE *rcfile;
      int size;
      char *buf, *p;
+     struct wordsplit ws;
+     int wsflags;
+     int line;
      
      if (stat(name, &st))
 	  return;
@@ -62,14 +66,19 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name)
      buf[size] = 0;
      fclose(rcfile);
 
+     ws.ws_comment = "#";
+     wsflags = WRDSF_DEFFLAGS | WRDSF_COMMENT;
+     line = 0;
      for (p = strtok(buf, "\n"); p; p = strtok(NULL, "\n")) {
-	  int argc;
-	  char **argv;
-	  
-	  argcv_get(p, "", "#", &argc, &argv);
-	  expand_argcv(argc_ptr, argv_ptr, argc, argv);
-	  free(argv);
+	  ++line;
+	  if (wordsplit(p, &ws, wsflags))
+	       error(1, 0, "%s:%d: %s", name, line, wordsplit_strerror(&ws));
+	  wsflags |= WRDSF_REUSE;
+	  if (ws.ws_wordc)
+	       expand_argcv(argc_ptr, argv_ptr, ws.ws_wordc, ws.ws_wordv);
      }
+     if (wsflags & WRDSF_REUSE)
+	  wordsplit_free(&ws);
      free(buf);
 }
 
@@ -94,12 +103,15 @@ sourcerc(int *argc_ptr, char ***argv_ptr)
      
      env = getenv("CFLOW_OPTIONS");
      if (env) {
-	  int argc;
-	  char **argv;
-	  
-	  argcv_get(env, "", "#", &argc, &argv);
-	  expand_argcv(&xargc, &xargv, argc, argv);
-	  free(argv);
+	  struct wordsplit ws;
+
+	  ws.ws_comment = "#";
+	  if (wordsplit(env, &ws, WRDSF_DEFFLAGS | WRDSF_COMMENT))
+	       error(1, 0, "failed to parse CFLOW_OPTIONS: %s",
+		     wordsplit_strerror(&ws));
+	  if (ws.ws_wordc)
+	       expand_argcv(&xargc, &xargv, ws.ws_wordc, ws.ws_wordv);
+	  wordsplit_free(&ws);
      }
 
      env = getenv("CFLOWRC");
diff --git a/src/wordsplit.c b/src/wordsplit.c
new file mode 100644
index 0000000..9047369
--- /dev/null
+++ b/src/wordsplit.c
@@ -0,0 +1,1624 @@
+/* wordsplit - a word splitter
+   Copyright (C) 2009-2012 Sergey Poznyakoff
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3 of the License, or (at your
+   option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License along
+   with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#if ENABLE_NLS
+# include <gettext.h>
+#else
+# define gettext(msgid) msgid
+#endif
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+#include <wordsplit.h>
+
+#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
+#define ISDELIM(ws,c) \
+  (strchr ((ws)->ws_delim, (c)) != NULL)
+#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
+#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
+#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
+#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
+#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
+#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
+#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
+#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
+
+#define ALLOC_INIT 128
+#define ALLOC_INCR 128
+
+static void
+_wsplt_alloc_die (struct wordsplit *wsp)
+{
+  wsp->ws_error (_("memory exhausted"));
+  abort ();
+}
+
+static void
+_wsplt_error (const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start (ap, fmt);
+  vfprintf (stderr, fmt, ap);
+  va_end (ap);
+  fputc ('\n', stderr);
+}
+
+static void wordsplit_free_nodes (struct wordsplit *);
+
+static int
+_wsplt_nomem (struct wordsplit *wsp)
+{
+  errno = ENOMEM;
+  wsp->ws_errno = WRDSE_NOSPACE;
+  if (wsp->ws_flags & WRDSF_ENOMEMABRT)
+    wsp->ws_alloc_die (wsp);
+  if (wsp->ws_flags & WRDSF_SHOWERR)
+    wordsplit_perror (wsp);
+  if (!(wsp->ws_flags & WRDSF_REUSE))
+    wordsplit_free (wsp);
+  wordsplit_free_nodes (wsp);
+  return wsp->ws_errno;
+}
+
+static void
+wordsplit_init0 (struct wordsplit *wsp)
+{
+  if (wsp->ws_flags & WRDSF_REUSE)
+    {
+      if (!(wsp->ws_flags & WRDSF_APPEND))
+	wordsplit_free_words (wsp);
+    }
+  else
+    {
+      wsp->ws_wordv = NULL;
+      wsp->ws_wordc = 0;
+      wsp->ws_wordn = 0;
+    }
+
+  wsp->ws_errno = 0;
+  wsp->ws_head = wsp->ws_tail = NULL;
+}
+
+static int
+wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+		int flags)
+{
+  wsp->ws_flags = flags;
+
+  if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
+    wsp->ws_alloc_die = _wsplt_alloc_die;
+  if (!(wsp->ws_flags & WRDSF_ERROR))
+    wsp->ws_error = _wsplt_error;
+
+  if (!(wsp->ws_flags & WRDSF_NOVAR)
+      && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+    {
+      errno = EINVAL;
+      wsp->ws_errno = WRDSE_USAGE;
+      if (wsp->ws_flags & WRDSF_SHOWERR)
+	wordsplit_perror (wsp);
+      return wsp->ws_errno;
+    }
+
+  if (!(wsp->ws_flags & WRDSF_NOCMD))
+    {
+      errno = EINVAL;
+      wsp->ws_errno = WRDSE_NOSUPP;
+      if (wsp->ws_flags & WRDSF_SHOWERR)
+	wordsplit_perror (wsp);
+      return wsp->ws_errno;
+    }
+
+  if (wsp->ws_flags & WRDSF_SHOWDBG)
+    {
+      if (!(wsp->ws_flags & WRDSF_DEBUG))
+	{
+	  if (wsp->ws_flags & WRDSF_ERROR)
+	    wsp->ws_debug = wsp->ws_error;
+	  else if (wsp->ws_flags & WRDSF_SHOWERR)
+	    wsp->ws_debug = _wsplt_error;
+	  else
+	    wsp->ws_flags &= ~WRDSF_SHOWDBG;
+	}
+    }
+
+  wsp->ws_input = input;
+  wsp->ws_len = len;
+
+  if (!(wsp->ws_flags & WRDSF_DOOFFS))
+    wsp->ws_offs = 0;
+
+  if (!(wsp->ws_flags & WRDSF_DELIM))
+    wsp->ws_delim = " \t\n";
+
+  if (!(wsp->ws_flags & WRDSF_COMMENT))
+    wsp->ws_comment = NULL;
+
+  if (!(wsp->ws_flags & WRDSF_CLOSURE))
+    wsp->ws_closure = NULL;
+
+  wsp->ws_endp = 0;
+
+  wordsplit_init0 (wsp);
+
+  return 0;
+}
+
+static int
+alloc_space (struct wordsplit *wsp, size_t count)
+{
+  size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
+  char **ptr;
+  size_t newalloc;
+
+  if (wsp->ws_wordv == NULL)
+    {
+      newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
+      ptr = calloc (newalloc, sizeof (ptr[0]));
+    }
+  else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
+    {
+      newalloc = offs + wsp->ws_wordc +
+	(count > ALLOC_INCR ? count : ALLOC_INCR);
+      ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
+    }
+  else
+    return 0;
+
+  if (ptr)
+    {
+      wsp->ws_wordn = newalloc;
+      wsp->ws_wordv = ptr;
+    }
+  else
+    return _wsplt_nomem (wsp);
+  return 0;
+}
+
+
+/* Node state flags */
+#define _WSNF_NULL     0x01	/* null node (a noop) */
+#define _WSNF_WORD     0x02	/* node contains word in v.word */
+#define _WSNF_QUOTE    0x04	/* text is quoted */
+#define _WSNF_NOEXPAND 0x08	/* text is not subject to expansion */
+#define _WSNF_JOIN     0x10	/* node must be joined with the next node */
+#define _WSNF_SEXP     0x20	/* is a sed expression */
+
+#define _WSNF_EMPTYOK  0x0100	/* special flag indicating that
+				   wordsplit_add_segm must add the
+				   segment even if it is empty */
+
+struct wordsplit_node
+{
+  struct wordsplit_node *prev;	/* Previous element */
+  struct wordsplit_node *next;	/* Next element */
+  int flags;			/* Node flags */
+  union
+  {
+    struct
+    {
+      size_t beg;		/* Start of word in ws_input */
+      size_t end;		/* End of word in ws_input */
+    } segm;
+    char *word;
+  } v;
+};
+
+static const char *
+wsnode_flagstr (int flags)
+{
+  static char retbuf[6];
+  char *p = retbuf;
+
+  if (flags & _WSNF_WORD)
+    *p++ = 'w';
+  else if (flags & _WSNF_NULL)
+    *p++ = 'n';
+  else
+    *p++ = '-';
+  if (flags & _WSNF_QUOTE)
+    *p++ = 'q';
+  else
+    *p++ = '-';
+  if (flags & _WSNF_NOEXPAND)
+    *p++ = 'E';
+  else
+    *p++ = '-';
+  if (flags & _WSNF_JOIN)
+    *p++ = 'j';
+  else
+    *p++ = '-';
+  if (flags & _WSNF_SEXP)
+    *p++ = 's';
+  else
+    *p++ = '-';
+  *p = 0;
+  return retbuf;
+}
+
+static const char *
+wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
+{
+  if (p->flags & _WSNF_NULL)
+    return "";
+  else if (p->flags & _WSNF_WORD)
+    return p->v.word;
+  else
+    return wsp->ws_input + p->v.segm.beg;
+}
+
+static size_t
+wsnode_len (struct wordsplit_node *p)
+{
+  if (p->flags & _WSNF_NULL)
+    return 0;
+  else if (p->flags & _WSNF_WORD)
+    return strlen (p->v.word);
+  else
+    return p->v.segm.end - p->v.segm.beg;
+}
+
+static int
+wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
+{
+  struct wordsplit_node *node = calloc (1, sizeof (*node));
+  if (!node)
+    return _wsplt_nomem (wsp);
+  *pnode = node;
+  return 0;
+}
+
+static void
+wsnode_free (struct wordsplit_node *p)
+{
+  if (p->flags & _WSNF_WORD)
+    free (p->v.word);
+  free (p);
+}
+
+static void
+wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
+{
+  node->next = NULL;
+  node->prev = wsp->ws_tail;
+  if (wsp->ws_tail)
+    wsp->ws_tail->next = node;
+  else
+    wsp->ws_head = node;
+  wsp->ws_tail = node;
+}
+
+static void
+wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
+{
+  struct wordsplit_node *p;
+
+  p = node->prev;
+  if (p)
+    {
+      p->next = node->next;
+      if (!node->next)
+	p->flags &= ~_WSNF_JOIN;
+    }
+  else
+    wsp->ws_head = node->next;
+
+  p = node->next;
+  if (p)
+    p->prev = node->prev;
+  else
+    wsp->ws_tail = node->prev;
+
+  node->next = node->prev = NULL;
+}
+
+static void
+wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
+	       struct wordsplit_node *anchor, int before)
+{
+  if (!wsp->ws_head)
+    {
+      node->next = node->prev = NULL;
+      wsp->ws_head = wsp->ws_tail = node;
+    }
+  else if (before)
+    {
+      if (anchor->prev)
+	wsnode_insert (wsp, node, anchor->prev, 0);
+      else
+	{
+	  node->prev = NULL;
+	  node->next = anchor;
+	  anchor->prev = node;
+	  wsp->ws_head = node;
+	}
+    }
+  else
+    {
+      struct wordsplit_node *p;
+
+      p = anchor->next;
+      if (p)
+	p->prev = node;
+      else
+	wsp->ws_tail = node;
+      node->next = p;
+      node->prev = anchor;
+      anchor->next = node;
+    }
+}
+
+static int
+wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
+{
+  struct wordsplit_node *node;
+  int rc;
+
+  if (end == beg && !(flg & _WSNF_EMPTYOK))
+    return 0;
+  rc = wsnode_new (wsp, &node);
+  if (rc)
+    return rc;
+  node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
+  node->v.segm.beg = beg;
+  node->v.segm.end = end;
+  wsnode_append (wsp, node);
+  return 0;
+}
+
+static void
+wordsplit_free_nodes (struct wordsplit *wsp)
+{
+  struct wordsplit_node *p;
+
+  for (p = wsp->ws_head; p;)
+    {
+      struct wordsplit_node *next = p->next;
+      wsnode_free (p);
+      p = next;
+    }
+  wsp->ws_head = wsp->ws_tail = NULL;
+}
+
+static void
+wordsplit_dump_nodes (struct wordsplit *wsp)
+{
+  struct wordsplit_node *p;
+  int n = 0;
+
+  for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
+    {
+      if (p->flags & _WSNF_WORD)
+	wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
+		       n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
+      else
+	wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
+		       n, p, p->flags, wsnode_flagstr (p->flags),
+		       (int) (p->v.segm.end - p->v.segm.beg),
+		       wsp->ws_input + p->v.segm.beg);
+    }
+}
+
+static int
+coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
+{
+  struct wordsplit_node *p, *end;
+  size_t len = 0;
+  char *buf, *cur;
+  int stop;
+
+  for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
+    {
+      len += wsnode_len (p);
+    }
+  if (p)
+    len += wsnode_len (p);
+  end = p;
+
+  buf = malloc (len + 1);
+  if (!buf)
+    return _wsplt_nomem (wsp);
+  cur = buf;
+
+  p = node;
+  for (stop = 0; !stop;)
+    {
+      struct wordsplit_node *next = p->next;
+      const char *str = wsnode_ptr (wsp, p);
+      size_t slen = wsnode_len (p);
+
+      memcpy (cur, str, slen);
+      cur += slen;
+      if (p != node)
+	{
+	  wsnode_remove (wsp, p);
+	  stop = p == end;
+	  wsnode_free (p);
+	}
+      p = next;
+    }
+
+  *cur = 0;
+
+  node->flags &= ~_WSNF_JOIN;
+
+  if (node->flags & _WSNF_WORD)
+    free (node->v.word);
+  else
+    node->flags |= _WSNF_WORD;
+  node->v.word = buf;
+  return 0;
+}
+
+static int
+wsnode_quoteremoval (struct wordsplit *wsp)
+{
+  struct wordsplit_node *p;
+  void (*uqfn) (char *, const char *, size_t) =
+    (wsp->ws_flags & WRDSF_CESCAPES) ?
+    wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
+
+  for (p = wsp->ws_head; p; p = p->next)
+    {
+      const char *str = wsnode_ptr (wsp, p);
+      size_t slen = wsnode_len (p);
+      int unquote;
+
+      if (wsp->ws_flags & WRDSF_QUOTE)
+	{
+	  unquote = !(p->flags & _WSNF_NOEXPAND);
+	}
+      else
+	unquote = 0;
+
+      if (unquote)
+	{
+	  if (!(p->flags & _WSNF_WORD))
+	    {
+	      char *newstr = malloc (slen + 1);
+	      if (!newstr)
+		return _wsplt_nomem (wsp);
+	      memcpy (newstr, str, slen);
+	      newstr[slen] = 0;
+	      p->v.word = newstr;
+	      p->flags |= _WSNF_WORD;
+	    }
+
+	  if (wsp->ws_flags & WRDSF_ESCAPE)
+	    wordsplit_general_unquote_copy (p->v.word, str, slen,
+				            wsp->ws_escape);
+	  else
+	    uqfn (p->v.word, str, slen);
+	}
+    }
+  return 0;
+}
+
+static int
+wsnode_coalesce (struct wordsplit *wsp)
+{
+  struct wordsplit_node *p;
+
+  for (p = wsp->ws_head; p; p = p->next)
+    {
+      if (p->flags & _WSNF_JOIN)
+	if (coalesce_segment (wsp, p))
+	  return 1;
+    }
+  return 0;
+}
+
+static int
+wordsplit_finish (struct wordsplit *wsp)
+{
+  struct wordsplit_node *p;
+  size_t n;
+
+  n = 0;
+
+  for (p = wsp->ws_head; p; p = p->next)
+    n++;
+
+  if (alloc_space (wsp, n + 1))
+    return 1;
+
+  for (p = wsp->ws_head; p; p = p->next)
+    {
+      const char *str = wsnode_ptr (wsp, p);
+      size_t slen = wsnode_len (p);
+      char *newstr = malloc (slen + 1);
+
+      /* Assign newstr first, even if it is NULL.  This way
+         wordsplit_free will work even if we return
+         nomem later. */
+      wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
+      if (!newstr)
+	return _wsplt_nomem (wsp);
+      memcpy (newstr, str, slen);
+      newstr[slen] = 0;
+
+      wsp->ws_wordc++;
+
+    }
+  wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
+  return 0;
+}
+
+
+/* Variable expansion */
+static int
+node_split_prefix (struct wordsplit *wsp,
+		   struct wordsplit_node **ptail,
+		   struct wordsplit_node *node,
+		   size_t beg, size_t len, int flg)
+{
+  struct wordsplit_node *newnode;
+
+  if (len == 0)
+    return 0;
+  if (wsnode_new (wsp, &newnode))
+    return 1;
+  wsnode_insert (wsp, newnode, *ptail, 0);
+  if (node->flags & _WSNF_WORD)
+    {
+      const char *str = wsnode_ptr (wsp, node);
+      char *newstr = malloc (len + 1);
+      if (!newstr)
+	return _wsplt_nomem (wsp);
+      memcpy (newstr, str + beg, len);
+      newstr[len] = 0;
+      newnode->flags = _WSNF_WORD;
+      newnode->v.word = newstr;
+    }
+  else
+    {
+      newnode->v.segm.beg = node->v.segm.beg + beg;
+      newnode->v.segm.end = newnode->v.segm.beg + len;
+    }
+  newnode->flags |= flg;
+  *ptail = newnode;
+  return 0;
+}
+
+static int
+find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
+{
+  enum
+  { st_init, st_squote, st_dquote } state = st_init;
+  size_t level = 1;
+
+  for (; i < len; i++)
+    {
+      switch (state)
+	{
+	case st_init:
+	  switch (str[i])
+	    {
+	    case '{':
+	      level++;
+	      break;
+
+	    case '}':
+	      if (--level == 0)
+		{
+		  *poff = i;
+		  return 0;
+		}
+	      break;
+
+	    case '"':
+	      state = st_dquote;
+	      break;
+
+	    case '\'':
+	      state = st_squote;
+	      break;
+	    }
+	  break;
+
+	case st_squote:
+	  if (str[i] == '\'')
+	    state = st_init;
+	  break;
+
+	case st_dquote:
+	  if (str[i] == '\\')
+	    i++;
+	  else if (str[i] == '"')
+	    state = st_init;
+	  break;
+	}
+    }
+  return 1;
+}
+
+static const char *
+wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
+{
+  size_t i;
+
+  if (!(wsp->ws_flags & WRDSF_ENV))
+    return NULL;
+
+  if (wsp->ws_flags & WRDSF_ENV_KV)
+    {
+      /* A key-value pair environment */
+      for (i = 0; wsp->ws_env[i]; i++)
+	{
+	  size_t elen = strlen (wsp->ws_env[i]);
+	  if (elen == len && memcmp (wsp->ws_env[i], name, elen
author	Sergey Poznyakoff <gray@gnu.org.ua>	2014-02-07 19:40:07 +0200
committer	Sergey Poznyakoff <gray@gnu.org.ua>	2014-02-07 19:40:07 +0200
commit	4be79061e8f68f6e3174a05452d96f31e8062464 (patch)
tree	c93924dc62e3f71f76cf29f98cb1c8d164e228a8 /src
parent	c9b5abe560c2fe06368cb733df8bcbfdb33a8526 (diff)
download	cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.gz cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.bz2