author | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-02-07 17:40:07 (GMT) |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-02-07 17:40:07 (GMT) |
commit | 4be79061e8f68f6e3174a05452d96f31e8062464 (patch) (side-by-side diff) | |
tree | c93924dc62e3f71f76cf29f98cb1c8d164e228a8 /src | |
parent | c9b5abe560c2fe06368cb733df8bcbfdb33a8526 (diff) | |
download | cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.gz cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.bz2 |
Use wordsplit.[ch] (from grecs) instead of the obsolete argcv.[ch]
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/argcv.c | 410 | ||||
-rw-r--r-- | src/argcv.h | 52 | ||||
-rw-r--r-- | src/rc.c | 42 | ||||
-rw-r--r-- | src/wordsplit.c | 1624 | ||||
-rw-r--r-- | src/wordsplit.h | 159 |
6 files changed, 1813 insertions, 480 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index bad0476..e7f0b85 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,8 +18,6 @@ bin_PROGRAMS = cflow cflow_SOURCES = \ - argcv.c\ - argcv.h\ c.l\ cflow.h\ depmap.c\ @@ -31,7 +29,9 @@ cflow_SOURCES = \ parser.h\ posix.c\ rc.c\ - symbol.c + symbol.c\ + wordsplit.c\ + wordsplit.h localedir = $(datadir)/locale diff --git a/src/argcv.c b/src/argcv.c deleted file mode 100644 index 611cb64..0000000 --- a/src/argcv.c +++ b/dev/null @@ -1,410 +0,0 @@ -/* argcv.c - simple functions for parsing input based on whitespace - Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <ctype.h> -#include <errno.h> -#include <argcv.h> - -/* - * takes a string and splits it into several strings, breaking at ' ' - * command is the string to split - * the number of strings is placed into argc - * the split strings are put into argv - * returns 0 on success, nonzero on failure - */ - -#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n') -#define isdelim(c,delim) (strchr(delim,(c))!=NULL) - -static int -argcv_scan (int len, const char *command, const char *delim, const char* cmnt, - int *start, int *end, int *save) -{ - int i = 0; - - for (;;) - { - i = *save; - - if (i >= len) - return i + 1; - - /* Skip initial whitespace */ - while (i < len && isws (command[i])) - i++; - *start = i; - - if (!isdelim (command[i], delim)) - { - while (i < len) - { - if (command[i] == '\\') - { - if (++i == len) - break; - i++; - continue; - } - - if (command[i] == '\'' || command[i] == '"') - { - int j; - for (j = i+1; j < len && command[j] != command[i]; j++) - if (command[j] == '\\') - j++; - if (j < len) - i = j+1; - else - i++; - } - else if (isws (command[i]) || isdelim (command[i], delim)) - break; - else - i++; /* skip the escaped character */ - } - i--; - } - - *end = i; - *save = i + 1; - - /* If we have a token, and it starts with a comment character, skip - to the newline and restart the token search. */ - if (*save <= len) - { - if (cmnt && strchr (cmnt, command[*start]) != NULL) - { - i = *save; - while (i < len && command[i] != '\n') - i++; - - *save = i; - continue; - } - } - break; - } - return *save; -} - -static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t"; - -int -argcv_unquote_char (int c) -{ - char *p; - - for (p = quote_transtab; *p; p += 2) - { - if (*p == c) - return p[1]; - } - return c; -} - -int -argcv_quote_char (int c) -{ - char *p; - - for (p = quote_transtab + sizeof(quote_transtab) - 2; - p > quote_transtab; p -= 2) - { - if (*p == c) - return p[-1]; - } - return -1; -} - -#define to_num(c) \ - (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 )) - -static int -xtonum (int *pval, const char *src, int base, int cnt) -{ - int i, val; - - for (i = 0, val = 0; i < cnt; i++, src++) - { - int n = *(unsigned char*)src; - if (n > 127 || (n = to_num(n)) >= base) - break; - val = val*base + n; - } - *pval = val; - return i; -} - -size_t -argcv_quoted_length (const char *str, int *quote) -{ - size_t len = 0; - - *quote = 0; - for (; *str; str++) - { - if (*str == ' ') - { - len++; - *quote = 1; - } - else if (*str == '"' || *str == '\'') - { - len += 2; - *quote = 1; - } - else if (*str != '\t' && *str != '\\' && isprint (*str)) - len++; - else if (argcv_quote_char (*str) != -1) - len += 2; - else - len += 4; - } - return len; -} - -void -argcv_unquote_copy (char *dst, const char *src, size_t n) -{ - int i = 0; - int c; - int expect_delim = 0; - - while (i < n) - { - switch (src[i]) - { - case '\'': - case '"': - if (!expect_delim) - { - const char *p; - - for (p = src+i+1; *p && *p != src[i]; p++) - if (*p == '\\') - p++; - if (*p) - expect_delim = src[i++]; - else - *dst++ = src[i++]; - } - else if (expect_delim == src[i]) - ++i; - else - *dst++ = src[i++]; - break; - - case '\\': - ++i; - if (src[i] == 'x' || src[i] == 'X') - { - if (n - i < 2) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - int off = xtonum(&c, src + i + 1, 16, 2); - if (off == 0) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - *dst++ = c; - i += off + 1; - } - } - } - else if ((unsigned char)src[i] < 128 && isdigit(src[i])) - { - if (n - i < 1) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - int off = xtonum(&c, src+i, 8, 3); - if (off == 0) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - *dst++ = c; - i += off; - } - } - } - else - *dst++ = argcv_unquote_char (src[i++]); - break; - - default: - *dst++ = src[i++]; - } - } - *dst = 0; -} - -void -argcv_quote_copy (char *dst, const char *src) -{ - for (; *src; src++) - { - if (*src == '"' || *src == '\'') - { - *dst++ = '\\'; - *dst++ = *src; - } - else if (*src != '\t' && *src != '\\' && isprint(*src)) - *dst++ = *src; - else - { - int c = argcv_quote_char (*src); - *dst++ = '\\'; - if (c != -1) - *dst++ = c; - else - { - char tmp[4]; - snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src); - memcpy (dst, tmp, 3); - dst += 3; - } - } - } -} - -int -argcv_get (const char *command, const char *delim, const char *cmnt, - int *argc, char ***argv) -{ - int len = strlen (command); - int i = 0; - int start, end, save; - - *argv = NULL; - - /* Count number of arguments */ - *argc = 0; - save = 0; - - while (argcv_scan (len, command, delim, cmnt, &start, &end, &save) <= len) - (*argc)++; - - *argv = calloc ((*argc + 1), sizeof (char *)); - if (*argv == NULL) - return ENOMEM; - - i = 0; - save = 0; - for (i = 0; i < *argc; i++) - { - int n; - argcv_scan (len, command, delim, cmnt, &start, &end, &save); - - if ((command[start] == '"' || command[end] == '\'') - && command[end] == command[start]) - { - start++; - end--; - } - n = end - start + 1; - (*argv)[i] = calloc (n+1, sizeof (char)); - if ((*argv)[i] == NULL) - return ENOMEM; - argcv_unquote_copy ((*argv)[i], &command[start], n); - (*argv)[i][n] = 0; - } - (*argv)[i] = NULL; - return 0; -} - -/* - * frees all elements of an argv array - * argc is the number of elements - * argv is the array - */ -int -argcv_free (int argc, char **argv) -{ - while (--argc >= 0) - if (argv[argc]) - free (argv[argc]); - free (argv); - return 0; -} - -/* Take a argv an make string separated by ' '. */ - -int -argcv_string (int argc, char **argv, char **pstring) -{ - size_t i, j, len; - char *buffer; - - /* No need. */ - if (pstring == NULL) - return EINVAL; - - buffer = malloc (1); - if (buffer == NULL) - return ENOMEM; - *buffer = '\0'; - - for (len = i = j = 0; i < argc; i++) - { - int quote; - int toklen; - - toklen = argcv_quoted_length (argv[i], "e); - - len += toklen + 2; - if (quote) - len += 2; - - buffer = realloc (buffer, len); - if (buffer == NULL) - return ENOMEM; - - if (i != 0) - buffer[j++] = ' '; - if (quote) - buffer[j++] = '"'; - argcv_quote_copy (buffer + j, argv[i]); - j += toklen; - if (quote) - buffer[j++] = '"'; - } - - for (; j > 0 && isspace (buffer[j-1]); j--) - ; - buffer[j] = 0; - if (pstring) - *pstring = buffer; - return 0; -} - diff --git a/src/argcv.h b/src/argcv.h deleted file mode 100644 index c082709..0000000 --- a/src/argcv.h +++ b/dev/null @@ -1,52 +0,0 @@ -/* GNU Mailutils -- a suite of utilities for electronic mail - Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifndef _ARGCV_H -#define _ARGCV_H 1 - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __P -# if defined PROTOTYPES || (defined __STDC__ && __STDC__) -# define __P(args) args -# else -# define __P(args) () -# endif -#endif /*__P */ - -extern int argcv_get __P ((const char *command, const char *delim, - const char* cmnt, - int *argc, char ***argv)); -extern int argcv_string __P ((int argc, char **argv, char **string)); -extern int argcv_free __P ((int argc, char **argv)); -extern int argcv_unquote_char __P((int c)); -extern int argcv_quote_char __P((int c)); -extern size_t argcv_quoted_length __P((const char *str, int *quote)); -extern void argcv_unquote_copy __P((char *dst, const char *src, size_t n)); -extern void argcv_quote_copy __P((char *dst, const char *src)); - -#ifdef __cplusplus -} -#endif - -#endif /* _ARGCV_H */ @@ -18,7 +18,7 @@ #include <parser.h> #include <sys/stat.h> #include <ctype.h> -#include <argcv.h> +#include <wordsplit.h> #ifndef LOCAL_RC # define LOCAL_RC ".cflowrc" @@ -31,8 +31,9 @@ expand_argcv(int *argc_ptr, char ***argv_ptr, int argc, char **argv) *argv_ptr = xrealloc(*argv_ptr, (*argc_ptr + argc + 1) * sizeof **argv_ptr); - for (i = 0; i <= argc; i++) - (*argv_ptr)[*argc_ptr + i] = argv[i]; + for (i = 0; i < argc; i++) + (*argv_ptr)[*argc_ptr + i] = xstrdup(argv[i]); + (*argv_ptr)[*argc_ptr + i] = NULL; *argc_ptr += argc; } @@ -45,6 +46,9 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name) FILE *rcfile; int size; char *buf, *p; + struct wordsplit ws; + int wsflags; + int line; if (stat(name, &st)) return; @@ -62,14 +66,19 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name) buf[size] = 0; fclose(rcfile); + ws.ws_comment = "#"; + wsflags = WRDSF_DEFFLAGS | WRDSF_COMMENT; + line = 0; for (p = strtok(buf, "\n"); p; p = strtok(NULL, "\n")) { - int argc; - char **argv; - - argcv_get(p, "", "#", &argc, &argv); - expand_argcv(argc_ptr, argv_ptr, argc, argv); - free(argv); + ++line; + if (wordsplit(p, &ws, wsflags)) + error(1, 0, "%s:%d: %s", name, line, wordsplit_strerror(&ws)); + wsflags |= WRDSF_REUSE; + if (ws.ws_wordc) + expand_argcv(argc_ptr, argv_ptr, ws.ws_wordc, ws.ws_wordv); } + if (wsflags & WRDSF_REUSE) + wordsplit_free(&ws); free(buf); } @@ -94,12 +103,15 @@ sourcerc(int *argc_ptr, char ***argv_ptr) env = getenv("CFLOW_OPTIONS"); if (env) { - int argc; - char **argv; - - argcv_get(env, "", "#", &argc, &argv); - expand_argcv(&xargc, &xargv, argc, argv); - free(argv); + struct wordsplit ws; + + ws.ws_comment = "#"; + if (wordsplit(env, &ws, WRDSF_DEFFLAGS | WRDSF_COMMENT)) + error(1, 0, "failed to parse CFLOW_OPTIONS: %s", + wordsplit_strerror(&ws)); + if (ws.ws_wordc) + expand_argcv(&xargc, &xargv, ws.ws_wordc, ws.ws_wordv); + wordsplit_free(&ws); } env = getenv("CFLOWRC"); diff --git a/src/wordsplit.c b/src/wordsplit.c new file mode 100644 index 0000000..9047369 --- a/dev/null +++ b/src/wordsplit.c @@ -0,0 +1,1624 @@ +/* wordsplit - a word splitter + Copyright (C) 2009-2012 Sergey Poznyakoff + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> + +#if ENABLE_NLS +# include <gettext.h> +#else +# define gettext(msgid) msgid +#endif +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#include <wordsplit.h> + +#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') +#define ISDELIM(ws,c) \ + (strchr ((ws)->ws_delim, (c)) != NULL) +#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) +#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') +#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') +#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) +#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') +#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) +#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) +#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) + +#define ALLOC_INIT 128 +#define ALLOC_INCR 128 + +static void +_wsplt_alloc_die (struct wordsplit *wsp) +{ + wsp->ws_error (_("memory exhausted")); + abort (); +} + +static void +_wsplt_error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); + fputc ('\n', stderr); +} + +static void wordsplit_free_nodes (struct wordsplit *); + +static int +_wsplt_nomem (struct wordsplit *wsp) +{ + errno = ENOMEM; + wsp->ws_errno = WRDSE_NOSPACE; + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + wsp->ws_alloc_die (wsp); + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + if (!(wsp->ws_flags & WRDSF_REUSE)) + wordsplit_free (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + +static void +wordsplit_init0 (struct wordsplit *wsp) +{ + if (wsp->ws_flags & WRDSF_REUSE) + { + if (!(wsp->ws_flags & WRDSF_APPEND)) + wordsplit_free_words (wsp); + } + else + { + wsp->ws_wordv = NULL; + wsp->ws_wordc = 0; + wsp->ws_wordn = 0; + } + + wsp->ws_errno = 0; + wsp->ws_head = wsp->ws_tail = NULL; +} + +static int +wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags) +{ + wsp->ws_flags = flags; + + if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) + wsp->ws_alloc_die = _wsplt_alloc_die; + if (!(wsp->ws_flags & WRDSF_ERROR)) + wsp->ws_error = _wsplt_error; + + if (!(wsp->ws_flags & WRDSF_NOVAR) + && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) + { + errno = EINVAL; + wsp->ws_errno = WRDSE_USAGE; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (!(wsp->ws_flags & WRDSF_NOCMD)) + { + errno = EINVAL; + wsp->ws_errno = WRDSE_NOSUPP; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + if (!(wsp->ws_flags & WRDSF_DEBUG)) + { + if (wsp->ws_flags & WRDSF_ERROR) + wsp->ws_debug = wsp->ws_error; + else if (wsp->ws_flags & WRDSF_SHOWERR) + wsp->ws_debug = _wsplt_error; + else + wsp->ws_flags &= ~WRDSF_SHOWDBG; + } + } + + wsp->ws_input = input; + wsp->ws_len = len; + + if (!(wsp->ws_flags & WRDSF_DOOFFS)) + wsp->ws_offs = 0; + + if (!(wsp->ws_flags & WRDSF_DELIM)) + wsp->ws_delim = " \t\n"; + + if (!(wsp->ws_flags & WRDSF_COMMENT)) + wsp->ws_comment = NULL; + + if (!(wsp->ws_flags & WRDSF_CLOSURE)) + wsp->ws_closure = NULL; + + wsp->ws_endp = 0; + + wordsplit_init0 (wsp); + + return 0; +} + +static int +alloc_space (struct wordsplit *wsp, size_t count) +{ + size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; + char **ptr; + size_t newalloc; + + if (wsp->ws_wordv == NULL) + { + newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; + ptr = calloc (newalloc, sizeof (ptr[0])); + } + else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) + { + newalloc = offs + wsp->ws_wordc + + (count > ALLOC_INCR ? count : ALLOC_INCR); + ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); + } + else + return 0; + + if (ptr) + { + wsp->ws_wordn = newalloc; + wsp->ws_wordv = ptr; + } + else + return _wsplt_nomem (wsp); + return 0; +} + + +/* Node state flags */ +#define _WSNF_NULL 0x01 /* null node (a noop) */ +#define _WSNF_WORD 0x02 /* node contains word in v.word */ +#define _WSNF_QUOTE 0x04 /* text is quoted */ +#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ +#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ +#define _WSNF_SEXP 0x20 /* is a sed expression */ + +#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that + wordsplit_add_segm must add the + segment even if it is empty */ + +struct wordsplit_node +{ + struct wordsplit_node *prev; /* Previous element */ + struct wordsplit_node *next; /* Next element */ + int flags; /* Node flags */ + union + { + struct + { + size_t beg; /* Start of word in ws_input */ + size_t end; /* End of word in ws_input */ + } segm; + char *word; + } v; +}; + +static const char * +wsnode_flagstr (int flags) +{ + static char retbuf[6]; + char *p = retbuf; + + if (flags & _WSNF_WORD) + *p++ = 'w'; + else if (flags & _WSNF_NULL) + *p++ = 'n'; + else + *p++ = '-'; + if (flags & _WSNF_QUOTE) + *p++ = 'q'; + else + *p++ = '-'; + if (flags & _WSNF_NOEXPAND) + *p++ = 'E'; + else + *p++ = '-'; + if (flags & _WSNF_JOIN) + *p++ = 'j'; + else + *p++ = '-'; + if (flags & _WSNF_SEXP) + *p++ = 's'; + else + *p++ = '-'; + *p = 0; + return retbuf; +} + +static const char * +wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return ""; + else if (p->flags & _WSNF_WORD) + return p->v.word; + else + return wsp->ws_input + p->v.segm.beg; +} + +static size_t +wsnode_len (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return 0; + else if (p->flags & _WSNF_WORD) + return strlen (p->v.word); + else + return p->v.segm.end - p->v.segm.beg; +} + +static int +wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) +{ + struct wordsplit_node *node = calloc (1, sizeof (*node)); + if (!node) + return _wsplt_nomem (wsp); + *pnode = node; + return 0; +} + +static void +wsnode_free (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_WORD) + free (p->v.word); + free (p); +} + +static void +wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) +{ + node->next = NULL; + node->prev = wsp->ws_tail; + if (wsp->ws_tail) + wsp->ws_tail->next = node; + else + wsp->ws_head = node; + wsp->ws_tail = node; +} + +static void +wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p; + + p = node->prev; + if (p) + { + p->next = node->next; + if (!node->next) + p->flags &= ~_WSNF_JOIN; + } + else + wsp->ws_head = node->next; + + p = node->next; + if (p) + p->prev = node->prev; + else + wsp->ws_tail = node->prev; + + node->next = node->prev = NULL; +} + +static void +wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, + struct wordsplit_node *anchor, int before) +{ + if (!wsp->ws_head) + { + node->next = node->prev = NULL; + wsp->ws_head = wsp->ws_tail = node; + } + else if (before) + { + if (anchor->prev) + wsnode_insert (wsp, node, anchor->prev, 0); + else + { + node->prev = NULL; + node->next = anchor; + anchor->prev = node; + wsp->ws_head = node; + } + } + else + { + struct wordsplit_node *p; + + p = anchor->next; + if (p) + p->prev = node; + else + wsp->ws_tail = node; + node->next = p; + node->prev = anchor; + anchor->next = node; + } +} + +static int +wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg) +{ + struct wordsplit_node *node; + int rc; + + if (end == beg && !(flg & _WSNF_EMPTYOK)) + return 0; + rc = wsnode_new (wsp, &node); + if (rc) + return rc; + node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK); + node->v.segm.beg = beg; + node->v.segm.end = end; + wsnode_append (wsp, node); + return 0; +} + +static void +wordsplit_free_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + wsnode_free (p); + p = next; + } + wsp->ws_head = wsp->ws_tail = NULL; +} + +static void +wordsplit_dump_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + int n = 0; + + for (p = wsp->ws_head, n = 0; p; p = p->next, n++) + { + if (p->flags & _WSNF_WORD) + wsp->ws_debug ("%4d: %p: %#04x (%s):%s;", + n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); + else + wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;", + n, p, p->flags, wsnode_flagstr (p->flags), + (int) (p->v.segm.end - p->v.segm.beg), + wsp->ws_input + p->v.segm.beg); + } +} + +static int +coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p, *end; + size_t len = 0; + char *buf, *cur; + int stop; + + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) + { + len += wsnode_len (p); + } + if (p) + len += wsnode_len (p); + end = p; + + buf = malloc (len + 1); + if (!buf) + return _wsplt_nomem (wsp); + cur = buf; + + p = node; + for (stop = 0; !stop;) + { + struct wordsplit_node *next = p->next; + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + + memcpy (cur, str, slen); + cur += slen; + if (p != node) + { + wsnode_remove (wsp, p); + stop = p == end; + wsnode_free (p); + } + p = next; + } + + *cur = 0; + + node->flags &= ~_WSNF_JOIN; + + if (node->flags & _WSNF_WORD) + free (node->v.word); + else + node->flags |= _WSNF_WORD; + node->v.word = buf; + return 0; +} + +static int +wsnode_quoteremoval (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + void (*uqfn) (char *, const char *, size_t) = + (wsp->ws_flags & WRDSF_CESCAPES) ? + wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + int unquote; + + if (wsp->ws_flags & WRDSF_QUOTE) + { + unquote = !(p->flags & _WSNF_NOEXPAND); + } + else + unquote = 0; + + if (unquote) + { + if (!(p->flags & _WSNF_WORD)) + { + char *newstr = malloc (slen + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + p->v.word = newstr; + p->flags |= _WSNF_WORD; + } + + if (wsp->ws_flags & WRDSF_ESCAPE) + wordsplit_general_unquote_copy (p->v.word, str, slen, + wsp->ws_escape); + else + uqfn (p->v.word, str, slen); + } + } + return 0; +} + +static int +wsnode_coalesce (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + if (p->flags & _WSNF_JOIN) + if (coalesce_segment (wsp, p)) + return 1; + } + return 0; +} + +static int +wordsplit_finish (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + size_t n; + + n = 0; + + for (p = wsp->ws_head; p; p = p->next) + n++; + + if (alloc_space (wsp, n + 1)) + return 1; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + char *newstr = malloc (slen + 1); + + /* Assign newstr first, even if it is NULL. This way + wordsplit_free will work even if we return + nomem later. */ + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr; + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + + wsp->ws_wordc++; + + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + return 0; +} + + +/* Variable expansion */ +static int +node_split_prefix (struct wordsplit *wsp, + struct wordsplit_node **ptail, + struct wordsplit_node *node, + size_t beg, size_t len, int flg) +{ + struct wordsplit_node *newnode; + + if (len == 0) + return 0; + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + if (node->flags & _WSNF_WORD) + { + const char *str = wsnode_ptr (wsp, node); + char *newstr = malloc (len + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str + beg, len); + newstr[len] = 0; + newnode->flags = _WSNF_WORD; + newnode->v.word = newstr; + } + else + { + newnode->v.segm.beg = node->v.segm.beg + beg; + newnode->v.segm.end = newnode->v.segm.beg + len; + } + newnode->flags |= flg; + *ptail = newnode; + return 0; +} + +static int +find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) +{ + enum + { st_init, st_squote, st_dquote } state = st_init; + size_t level = 1; + + for (; i < len; i++) + { + switch (state) + { + case st_init: + switch (str[i]) + { + case '{': + level++; + break; + + case '}': + if (--level == 0) + { + *poff = i; + return 0; + } + break; + + case '"': + state = st_dquote; + break; + + case '\'': + state = st_squote; + break; + } + break; + + case st_squote: + if (str[i] == '\'') + state = st_init; + break; + + case st_dquote: + if (str[i] == '\\') + i++; + else if (str[i] == '"') + state = st_init; + break; + } + } + return 1; +} + +static const char * +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +{ + size_t i; + + if (!(wsp->ws_flags & WRDSF_ENV)) + return NULL; + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t elen = strlen (wsp->ws_env[i]); + if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) + return wsp->ws_env[i + 1]; + /* Skip the value. Break the loop if it is NULL. */ + i++; + if (wsp->ws_env[i] == NULL) + break; + } + } + else + { + /* Usual (A=B) environment. */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t j; + const char *var = wsp->ws_env[i]; + + for (j = 0; j < len; j++) + if (name[j] != var[j]) + break; + if (j == len && var[j] == '=') + return var + j + 1; + } + } + return NULL; +} + +static int +expvar (struct wordsplit *wsp, const char *str, size_t len, + struct wordsplit_node **ptail, const char **pend, int flg) +{ + size_t i = 0; + const char *defstr = NULL; + const char *value; + const char *vptr; + struct wordsplit_node *newnode; + const char *start = str - 1; + + if (ISALPHA (str[0]) || str[0] == '_') + { + for (i = 1; i < len; i++) + if (!(ISALNUM (str[i]) || str[i] == '_')) + break; + *pend = str + i - 1; + } + else if (str[0] == '{') + { + str++; + len--; + for (i = 1; i < len; i++) + if (str[i] == '}' || str[i] == ':') + break; + if (str[i] == ':') + { + size_t j; + + defstr = str + i + 1; + if (find_closing_cbrace (str, i + 1, len, &j)) + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + *pend = str + j; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + } + else + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | flg; + newnode->v.word = malloc (3); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + newnode->v.word[0] = '$'; + newnode->v.word[1] = str[0]; + newnode->v.word[2] = 0; + *pend = str; + return 0; + } + + /* Actually expand the variable */ + /* str - start of the variable name + i - its length + defstr - default replacement str */ + + vptr = wordsplit_find_env (wsp, str, i); + if (vptr) + { + value = strdup (vptr); + if (!value) + return _wsplt_nomem (wsp); + } + else if (wsp->ws_flags & WRDSF_GETVAR) + value = wsp->ws_getvar (str, i, wsp->ws_closure); + else if (wsp->ws_flags & WRDSF_UNDEF) + { + wsp->ws_errno = WRDSE_UNDEF; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return 1; + } + else + { + if (wsp->ws_flags & WRDSF_WARNUNDEF) + wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str); + if (wsp->ws_flags & WRDSF_KEEPUNDEF) + value = NULL; + else + value = ""; + } + /* FIXME: handle defstr */ + if (value) + { + if (flg & _WSNF_QUOTE) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = strdup (value); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + else if (*value == 0) + { + /* Empty string is a special case */ + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + else + { + struct wordsplit ws; + int i; + + ws.ws_delim = wsp->ws_delim; + if (wordsplit (value, &ws, + WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS)) + { + wordsplit_free (&ws); + return 1; + } + for (i = 0; i < ws.ws_wordc; i++) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | + _WSNF_NOEXPAND | + (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg); + newnode->v.word = strdup (ws.ws_wordv[i]); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + wordsplit_free (&ws); + } + } + else if (wsp->ws_flags & WRDSF_KEEPUNDEF) + { + size_t size = *pend - start + 1; + + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = malloc (size + 1); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + memcpy (newnode->v.word, start, size); + newnode->v.word[size] = 0; + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + return 0; +} + +static int +node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) +{ + const char *str = wsnode_ptr (wsp, node); + size_t slen = wsnode_len (node); + const char *end = str + slen; + const char *p; + size_t off = 0; + struct wordsplit_node *tail = node; + + for (p = str; p < end; p++) + { + if (*p == '\\') + { + p++; + continue; + } + if (*p == '$') + { + size_t n = p - str; + + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN)) + return 1; + p++; + if (expvar (wsp, p, slen - n, &tail, &p, + node->flags & (_WSNF_JOIN | _WSNF_QUOTE))) + return 1; + off += p - str + 1; + str = p + 1; + } + } + if (p > str) + { + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, p - str, + node->flags & _WSNF_JOIN)) + return 1; + } + if (tail != node) + { + wsnode_remove (wsp, node); + wsnode_free (node); + } + return 0; +} + +/* Remove NULL lists */ +static void +wsnode_nullelim (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_NULL) + { + wsnode_remove (wsp, p); + wsnode_free (p); + } + p = next; + } +} + +static int +wordsplit_varexp (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (!(p->flags & _WSNF_NOEXPAND)) + if (node_expand_vars (wsp, p)) + return 1; + p = next; + } + + wsnode_nullelim (wsp); + return 0; +} + +/* Strip off any leading and trailing whitespace. This function is called + right after the initial scanning, therefore it assumes that every + node in the list is a text reference node. */ +static void +wordsplit_trimws (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + size_t n; + + if (p->flags & _WSNF_QUOTE) + continue; + + /* Skip leading whitespace: */ + for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]); + n++) + ; + p->v.segm.beg = n; + /* Trim trailing whitespace */ + for (n = p->v.segm.end; + n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--); + p->v.segm.end = n; + if (p->v.segm.beg == p->v.segm.end) + p->flags |= _WSNF_NULL; + } + + wsnode_nullelim (wsp); +} + +static int +skip_sed_expr (const char *command, size_t i, size_t len) +{ + int state; + + do + { + int delim; + + if (command[i] == ';') + i++; + if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))) + break; + + delim = command[++i]; + state = 1; + for (i++; i < len; i++) + { + if (state == 3) + { + if (command[i] == delim || !ISALNUM (command[i])) + break; + } + else if (command[i] == '\\') + i++; + else if (command[i] == delim) + state++; + } + } + while (state == 3 && i < len && command[i] == ';'); + return i; +} + +static size_t +skip_delim (struct wordsplit *wsp) +{ + size_t start = wsp->ws_endp; + if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) + { + if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) && + ISDELIM (wsp, wsp->ws_input[start])) + { + int delim = wsp->ws_input[start]; + do + start++; + while (start < wsp->ws_len && delim == wsp->ws_input[start]); + } + else + { + do + start++; + while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start])); + } + start--; + } + + if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS)) + start++; + + return start; +} + +#define _WRDS_EOF 0 +#define _WRDS_OK 1 +#define _WRDS_ERR 2 + +static int +scan_qstring (struct wordsplit *wsp, size_t start, size_t * end) +{ + size_t j; + const char *command = wsp->ws_input; + size_t len = wsp->ws_len; + char q = command[start]; + + for (j = start + 1; j < len && command[j] != q; j++) + if (q == '"' && command[j] == '\\') + j++; + if (j < len && command[j] == q) + { + int flags = _WSNF_QUOTE | _WSNF_EMPTYOK; + if (q == '\'') + flags |= _WSNF_NOEXPAND; + if (wordsplit_add_segm (wsp, start + 1, j, flags)) + return _WRDS_ERR; + *end = j; + } + else + { + wsp->ws_endp = start; + wsp->ws_errno = WRDSE_QUOTE; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return _WRDS_ERR; + } + return 0; +} + +static int +scan_word (struct wordsplit *wsp, size_t start) +{ + size_t len = wsp->ws_len; + const char *command = wsp->ws_input; + const char *comment = wsp->ws_comment; + int join = 0; + int flags = 0; + + size_t i = start; + + if (i >= len) + { + wsp->ws_errno = WRDSE_EOF; + return _WRDS_EOF; + } + + start = i; + + if (wsp->ws_flags & WRDSF_SED_EXPR + && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])) + { + flags = _WSNF_SEXP; + i = skip_sed_expr (command, i, len); + } + else if (!ISDELIM (wsp, command[i])) + { + while (i < len) + { + if (comment && strchr (comment, command[i]) != NULL) + { + size_t j; + for (j = i + 1; j < len && command[j] != '\n'; j++) + ; + if (wordsplit_add_segm (wsp, start, i, 0)) + return _WRDS_ERR; + wsp->ws_endp = j; + return _WRDS_OK; + } + + if (wsp->ws_flags & WRDSF_QUOTE) + { + if (command[i] == '\\') + { + if (++i == len) + break; + i++; + continue; + } + + if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') || + ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"')) + { + if (join && wsp->ws_tail) + wsp->ws_tail->flags |= _WSNF_JOIN; + if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN)) + return _WRDS_ERR; + if (scan_qstring (wsp, i, &i)) + return _WRDS_ERR; + start = i + 1; + join = 1; + } + } + + if (ISDELIM (wsp, command[i])) + break; + else + i++; + } + } + else if (wsp->ws_flags & WRDSF_RETURN_DELIMS) + { + i++; + } + else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)) + flags |= _WSNF_EMPTYOK; + + if (join && i > start && wsp->ws_tail) + wsp->ws_tail->flags |= _WSNF_JOIN; + if (wordsplit_add_segm (wsp, start, i, flags)) + return _WRDS_ERR; + wsp->ws_endp = i; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + return _WRDS_EOF; + return _WRDS_OK; +} + +static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + +int +wordsplit_c_unquote_char (int c) +{ + char *p; + + for (p = quote_transtab; *p; p += 2) + { + if (*p == c) + return p[1]; + } + return c; +} + +int +wordsplit_c_quote_char (int c) +{ + char *p; + + for (p = quote_transtab + sizeof (quote_transtab) - 2; + p > quote_transtab; p -= 2) + { + if (*p == c) + return p[-1]; + } + return -1; +} + +#define to_num(c) \ + (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) + +static int +xtonum (int *pval, const char *src, int base, int cnt) +{ + int i, val; + + for (i = 0, val = 0; i < cnt; i++, src++) + { + int n = *(unsigned char *) src; + if (n > 127 || (n = to_num (n)) >= base) + break; + val = val * base + n; + } + *pval = val; + return i; +} + +size_t +wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) +{ + size_t len = 0; + + *quote = 0; + for (; *str; str++) + { + if (strchr (" \"", *str)) + *quote = 1; + + if (*str == ' ') + len++; + else if (*str == '"') + len += 2; + else if (*str != '\t' && *str != '\\' && ISPRINT (*str)) + len++; + else if (quote_hex) + len += 3; + else + { + if (wordsplit_c_quote_char (*str) != -1) + len += 2; + else + len += 4; + } + } + return len; +} + +void +wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, + const char *escapable) +{ + int i; + + for (i = 0; i < n;) + { + if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1])) + i++; + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) +{ + int i; + + for (i = 0; i < n;) + { + if (src[i] == '\\') + i++; + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_c_unquote_copy (char *dst, const char *src, size_t n) +{ + int i = 0; + int c; + + while (i < n) + { + if (src[i] == '\\') + { + ++i; + if (src[i] == 'x' || src[i] == 'X') + { + if (n - i < 2) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum (&c, src + i + 1, + 16, 2); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off + 1; + } + } + } + else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i])) + { + if (n - i < 1) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum (&c, src + i, 8, 3); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off; + } + } + } + else + *dst++ = wordsplit_c_unquote_char (src[i++]); + } + else + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex) +{ + for (; *src; src++) + { + if (*src == '"') + { + *dst++ = '\\'; + *dst++ = *src; + } + else if (*src != '\t' && *src != '\\' && ISPRINT (*src)) + *dst++ = *src; + else + { + char tmp[4]; + + if (quote_hex) + { + snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src); + memcpy (dst, tmp, 3); + dst += 3; + } + else + { + int c = wordsplit_c_quote_char (*src); + *dst++ = '\\'; + if (c != -1) + *dst++ = c; + else + { + snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src); + memcpy (dst, tmp, 3); + dst += 3; + } + } + } + } +} + +static int +wordsplit_process_list (struct wordsplit *wsp, size_t start) +{ + if (wsp->ws_flags & WRDSF_NOSPLIT) + { + /* Treat entire input as a quoted argument */ + if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE)) + return wsp->ws_errno; + } + else + { + int rc; + + while ((rc = scan_word (wsp, start)) == _WRDS_OK) + start = skip_delim (wsp); + /* Make sure tail element is not joinable */ + if (wsp->ws_tail) + wsp->ws_tail->flags &= ~_WSNF_JOIN; + if (rc == _WRDS_ERR) + return wsp->ws_errno; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Initial list:"); + wordsplit_dump_nodes (wsp); + } + + if (wsp->ws_flags & WRDSF_WS) + { + /* Trim leading and trailing whitespace */ + wordsplit_trimws (wsp); + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("After WS trimming:"); + wordsplit_dump_nodes (wsp); + } + } + + /* Expand variables (FIXME: & commands) */ + if (!(wsp->ws_flags & WRDSF_NOVAR)) + { + if (wordsplit_varexp (wsp)) + { + wordsplit_free_nodes (wsp); + return wsp->ws_errno; + } + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Expanded list:"); + wordsplit_dump_nodes (wsp); + } + } + + do + { + if (wsnode_quoteremoval (wsp)) + break; + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("After quote removal:"); + wordsplit_dump_nodes (wsp); + } + + if (wsnode_coalesce (wsp)) + break; + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Coalesced list:"); + wordsplit_dump_nodes (wsp); + } + } + while (0); + return wsp->ws_errno; +} + +int +wordsplit_len (const char *command, size_t length, struct wordsplit *wsp, + int flags) +{ + int rc; + size_t start; + const char *cmdptr; + size_t cmdlen; + + if (!command) + { + if (!(flags & WRDSF_INCREMENTAL)) + return EINVAL; + + start = skip_delim (wsp); + if (wsp->ws_endp == wsp->ws_len) + { + wsp->ws_errno = WRDSE_NOINPUT; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + cmdptr = wsp->ws_input + wsp->ws_endp; + cmdlen = wsp->ws_len - wsp->ws_endp; + wsp->ws_flags |= WRDSF_REUSE; + wordsplit_init0 (wsp); + } + else + { + cmdptr = command; + cmdlen = length; + start = 0; + rc = wordsplit_init (wsp, cmdptr, cmdlen, flags); + if (rc) + return rc; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr); + + rc = wordsplit_process_list (wsp, start); + if (rc == 0 && (flags & WRDSF_INCREMENTAL)) + { + while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len) + { + start = skip_delim (wsp); + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + cmdptr = wsp->ws_input + wsp->ws_endp; + cmdlen = wsp->ws_len - wsp->ws_endp; + wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr); + } + rc = wordsplit_process_list (wsp, start); + if (rc) + break; + } + } + if (rc) + { + wordsplit_free_nodes (wsp); + return rc; + } + wordsplit_finish (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + +int +wordsplit (const char *command, struct wordsplit *ws, int flags) +{ + return wordsplit_len (command, command ? strlen (command) : 0, ws, + flags); +} + +void +wordsplit_free_words (struct wordsplit *ws) +{ + size_t i; + + for (i = 0; i < ws->ws_wordc; i++) + { + char *p = ws->ws_wordv[ws->ws_offs + i]; + if (p) + { + free (p); + ws->ws_wordv[ws->ws_offs + i] = NULL; + } + } + ws->ws_wordc = 0; +} + +void +wordsplit_free (struct wordsplit *ws) +{ + wordsplit_free_words (ws); + free (ws->ws_wordv); + ws->ws_wordv = NULL; +} + +void +wordsplit_perror (struct wordsplit *wsp) +{ + switch (wsp->ws_errno) + { + case WRDSE_EOF: + wsp->ws_error (_("no error")); + break; + + case WRDSE_QUOTE: + wsp->ws_error (_("missing closing %c (start near #%lu)"), + wsp->ws_input[wsp->ws_endp], + (unsigned long) wsp->ws_endp); + break; + + case WRDSE_NOSPACE: + wsp->ws_error (_("memory exhausted")); + break; + + case WRDSE_NOSUPP: + wsp->ws_error (_("command substitution is not yet supported")); + break; + + case WRDSE_USAGE: + wsp->ws_error (_("invalid wordsplit usage")); + break; + + case WRDSE_CBRACE: + wsp->ws_error (_("unbalanced curly brace")); + break; + + case WRDSE_UNDEF: + wsp->ws_error (_("undefined variable")); + break; + + case WRDSE_NOINPUT: + wsp->ws_error (_("input exhausted")); + break; + + default: + wsp->ws_error (_("unknown error")); + } +} + +const char *_wordsplit_errstr[] = { + N_("no error"), + N_("missing closing quote"), + N_("memory exhausted"), + N_("command substitution is not yet supported"), + N_("invalid wordsplit usage"), + N_("unbalanced curly brace"), + N_("undefined variable"), + N_("input exhausted") +}; +int _wordsplit_nerrs = + sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]); + +const char * +wordsplit_strerror (struct wordsplit *ws) +{ + if (ws->ws_errno < _wordsplit_nerrs) + return _wordsplit_errstr[ws->ws_errno]; + return N_("unknown error"); +} diff --git a/src/wordsplit.h b/src/wordsplit.h new file mode 100644 index 0000000..35e125a --- a/dev/null +++ b/src/wordsplit.h @@ -0,0 +1,159 @@ +/* wordsplit - a word splitter + Copyright (C) 2009-2012 Sergey Poznyakoff + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef __WORDSPLIT_H +#define __WORDSPLIT_H + +#include <stddef.h> + +struct wordsplit +{ + size_t ws_wordc; + char **ws_wordv; + size_t ws_offs; + size_t ws_wordn; + int ws_flags; + const char *ws_delim; + const char *ws_comment; + const char *ws_escape; + void (*ws_alloc_die) (struct wordsplit * wsp); + void (*ws_error) (const char *, ...) + __attribute__ ((__format__ (__printf__, 1, 2))); + void (*ws_debug) (const char *, ...) + __attribute__ ((__format__ (__printf__, 1, 2))); + + const char **ws_env; + const char *(*ws_getvar) (const char *, size_t, void *); + void *ws_closure; + + const char *ws_input; + size_t ws_len; + size_t ws_endp; + int ws_errno; + struct wordsplit_node *ws_head, *ws_tail; +}; + +/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused. + It is getting crowded... */ +/* Append the words found to the array resulting from a previous + call. */ +#define WRDSF_APPEND 0x00000001 +/* Insert we_offs initial NULLs in the array ws_wordv. + (These are not counted in the returned ws_wordc.) */ +#define WRDSF_DOOFFS 0x00000002 +/* Don't do command substitution. Reserved for future use. */ +#define WRDSF_NOCMD 0x00000004 +/* The parameter p resulted from a previous call to + wordsplit(), and wordsplit_free() was not called. Reuse the + allocated storage. */ +#define WRDSF_REUSE 0x00000008 +/* Print errors */ +#define WRDSF_SHOWERR 0x00000010 +/* Consider it an error if an undefined shell variable + is expanded. */ +#define WRDSF_UNDEF 0x00000020 + +/* Don't do variable expansion. */ +#define WRDSF_NOVAR 0x00000040 +/* Abort on ENOMEM error */ +#define WRDSF_ENOMEMABRT 0x00000080 +/* Trim off any leading and trailind whitespace */ +#define WRDSF_WS 0x00000100 +/* Handle single quotes */ +#define WRDSF_SQUOTE 0x00000200 +/* Handle double quotes */ +#define WRDSF_DQUOTE 0x00000400 +/* Handle quotes and escape directives */ +#define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE) +/* Replace each input sequence of repeated delimiters with a single + delimiter */ +#define WRDSF_SQUEEZE_DELIMS 0x00000800 +/* Return delimiters */ +#define WRDSF_RETURN_DELIMS 0x00001000 +/* Treat sed expressions as words */ +#define WRDSF_SED_EXPR 0x00002000 +/* ws_delim field is initialized */ +#define WRDSF_DELIM 0x00004000 +/* ws_comment field is initialized */ +#define WRDSF_COMMENT 0x00008000 +/* ws_alloc_die field is initialized */ +#define WRDSF_ALLOC_DIE 0x00010000 +/* ws_error field is initialized */ +#define WRDSF_ERROR 0x00020000 +/* ws_debug field is initialized */ +#define WRDSF_DEBUG 0x00040000 +/* ws_env field is initialized */ +#define WRDSF_ENV 0x00080000 +/* ws_getvar field is initialized */ +#define WRDSF_GETVAR 0x00100000 +/* enable debugging */ +#define WRDSF_SHOWDBG 0x00200000 +/* Don't split input into words. Useful for side effects. */ +#define WRDSF_NOSPLIT 0x00400000 +/* Keep undefined variables in place, instead of expanding them to + empty string */ +#define WRDSF_KEEPUNDEF 0x00800000 +/* Warn about undefined variables */ +#define WRDSF_WARNUNDEF 0x01000000 +/* Handle C escapes */ +#define WRDSF_CESCAPES 0x02000000 + +/* ws_closure is set */ +#define WRDSF_CLOSURE 0x04000000 +/* ws_env is a Key/Value environment, i.e. the value of a variable is + stored in the element that follows its name. */ +#define WRDSF_ENV_KV 0x08000000 + +/* ws_escape is set */ +#define WRDSF_ESCAPE 0x10000000 + +/* Incremental mode */ +#define WRDSF_INCREMENTAL 0x20000000 + +#define WRDSF_DEFFLAGS \ + (WRDSF_NOVAR | WRDSF_NOCMD | \ + WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) + +#define WRDSE_EOF 0 +#define WRDSE_QUOTE 1 +#define WRDSE_NOSPACE 2 +#define WRDSE_NOSUPP 3 +#define WRDSE_USAGE 4 +#define WRDSE_CBRACE 5 +#define WRDSE_UNDEF 6 +#define WRDSE_NOINPUT 7 + +int wordsplit (const char *s, struct wordsplit *p, int flags); +int wordsplit_len (const char *s, size_t len, + struct wordsplit *p, int flags); +void wordsplit_free (struct wordsplit *p); +void wordsplit_free_words (struct wordsplit *ws); + +int wordsplit_c_unquote_char (int c); +int wordsplit_c_quote_char (int c); +size_t wordsplit_c_quoted_length (const char *str, int quote_hex, + int *quote); +void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, + const char *escapable); +void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); +void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); +void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); + +void wordsplit_perror (struct wordsplit *ws); +const char *wordsplit_strerror (struct wordsplit *ws); + + +#endif |