diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-04-18 22:16:48 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-04-18 22:16:48 +0300 |
commit | ec709eaca89cfb0f13c340439f9e3ea33e85bc22 (patch) | |
tree | ac4e9d608ab94a8cb4b5fc5182499fef83a1ee67 /src | |
parent | fdecabd82197944d317824ee7006ffb82aeb20cc (diff) | |
download | grecs-ec709eaca89cfb0f13c340439f9e3ea33e85bc22.tar.gz grecs-ec709eaca89cfb0f13c340439f9e3ea33e85bc22.tar.bz2 |
Remove superfluous gnulib modules. Improve wordsplit.
* gnulib.modules: Remove c-type, error, regex and stdbool.
* src/grecs-gram.y: Use 0/1 instead of false/true.
* src/preproc.c: Likewise.
* src/grecs.h: Don't include stdbool.h
* src/wordsplit.c: Apply fixes from Mailutils.
* src/wordsplit.h: Likewise.
Diffstat (limited to 'src')
-rw-r--r-- | src/grecs-gram.y | 18 | ||||
-rw-r--r-- | src/grecs.h | 1 | ||||
-rw-r--r-- | src/preproc.c | 2 | ||||
-rw-r--r-- | src/wordsplit.c | 1348 | ||||
-rw-r--r-- | src/wordsplit.h | 109 |
5 files changed, 1236 insertions, 242 deletions
diff --git a/src/grecs-gram.y b/src/grecs-gram.y index 664737c..0fe8443 100644 --- a/src/grecs-gram.y +++ b/src/grecs-gram.y @@ -62,7 +62,7 @@ static void stmt_end (struct grecs_keyword *kwp); static struct grecs_keyword *find_keyword (const char *ident); static void process_ident (struct grecs_keyword *kwp, grecs_value_t *value); -static gl_list_t simple_list_create (bool dispose); +static gl_list_t simple_list_create (int dispose); %} %union { @@ -144,7 +144,7 @@ vallist : vlist vlist : value { - $$ = simple_list_create (false); + $$ = simple_list_create (0); gl_list_add_last ($$, grecs_value_dup (&$1)); } | vlist value @@ -191,7 +191,7 @@ slist : slist0 slist0 : QSTRING { - $$ = simple_list_create (false); + $$ = simple_list_create (0); gl_list_add_last ($$, $1); } | slist0 QSTRING @@ -217,7 +217,7 @@ list : '(' ')' values : value { - $$ = simple_list_create (false); + $$ = simple_list_create (0); gl_list_add_last ($$, grecs_value_dup (&$1)); } | values ',' value @@ -247,13 +247,13 @@ listel_dispose(const void *el) } static gl_list_t -simple_list_create (bool dispose) +simple_list_create (int dispose) { return gl_list_create_empty(&gl_linked_list_implementation, NULL, NULL, dispose ? listel_dispose : NULL, - false); + 0); } @@ -356,7 +356,7 @@ stmt_begin (struct grecs_keyword *kwp, grecs_value_t tag) void *target; if (!sections) - sections = simple_list_create (false); + sections = simple_list_create (0); gl_list_add_first (sections, cursect); if (kwp) { @@ -815,7 +815,7 @@ grecs_process_ident (struct grecs_keyword *kwp, grecs_value_t *value, grecs_prop_tab[type].eqfn, NULL, NULL, - false); + 0); while (gl_list_iterator_next (&itr, &p, NULL)) { @@ -868,7 +868,7 @@ grecs_process_ident (struct grecs_keyword *kwp, grecs_value_t *value, grecs_prop_tab[type].eqfn, NULL, listel_dispose, - false); + 0); if (type == grecs_type_string) gl_list_add_last (list, value->v.string); else diff --git a/src/grecs.h b/src/grecs.h index 1f17ac2..3fc01f2 100644 --- a/src/grecs.h +++ b/src/grecs.h @@ -18,7 +18,6 @@ #include <unistd.h> #include <stdlib.h> #include <stdio.h> -#include <stdbool.h> #include <gl_xlist.h> #include <gl_linked_list.h> diff --git a/src/preproc.c b/src/preproc.c index c0a8a1e..0723a2e 100644 --- a/src/preproc.c +++ b/src/preproc.c @@ -42,7 +42,7 @@ # define _(msgid) msgid #endif -bool grecs_log_to_stderr = true; +bool grecs_log_to_stderr = 1; void (*grecs_log_setup_hook) () = NULL; struct input_file_ident diff --git a/src/wordsplit.c b/src/wordsplit.c index 54904a6..b58ccb7 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -18,62 +18,126 @@ # include <config.h> #endif -#include <ctype.h> -#include <c-ctype.h> #include <errno.h> +#include <ctype.h> #include <unistd.h> #include <stdlib.h> #include <string.h> #include <stdio.h> -#include <wordsplit.h> +#include <stdarg.h> -#include <error.h> #include <gettext.h> #define _(msgid) gettext (msgid) -#include <xalloc.h> -#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n') -#define isdelim(c,delim) (strchr(delim,(c))!=NULL) - -#define _ARGCV_WORD_SED_EXPR 0x10000 -#define _ARGCV_WORD_MASK 0xf0000 +#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') +#define ISDELIM(ws,c) \ + (strchr ((ws)->ws_delim, (c)) != NULL) +#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) +#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') +#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') +#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) +#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') +#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) +#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) +#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) #define ALLOC_INIT 128 #define ALLOC_INCR 128 +static void +_wsplt_alloc_die (struct wordsplit *wsp) +{ + wsp->ws_error (_("memory exhausted")); + abort (); +} + +static void +_wsplt_error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); + fputc ('\n', stderr); +} + +static void wordsplit_free_nodes (struct wordsplit *); + +static int +_wsplt_nomem (struct wordsplit *wsp) +{ + errno = ENOMEM; + wsp->ws_errno = WRDSE_NOSPACE; + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + wsp->ws_alloc_die (wsp); + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + if (!(wsp->ws_flags & WRDSF_REUSE)) + wordsplit_free (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, - int flags) + int flags) { wsp->ws_flags = flags; - if ((wsp->ws_flags & (WRDSF_NOVAR|WRDSF_NOCMD)) - != (WRDSF_NOVAR|WRDSF_NOCMD)) + + if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) + wsp->ws_alloc_die = _wsplt_alloc_die; + if (!(wsp->ws_flags & WRDSF_ERROR)) + wsp->ws_error = _wsplt_error; + + if (!(wsp->ws_flags & WRDSF_NOVAR) + && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) { + errno = EINVAL; + wsp->ws_errno = WRDSE_USAGE; if (wsp->ws_flags & WRDSF_SHOWERR) - error (0, 0, - _("variable expansion and command substitution " - "are not yet supported")); + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (!(wsp->ws_flags & WRDSF_NOCMD)) + { errno = EINVAL; - return 1; + wsp->ws_errno = WRDSE_NOSUPP; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + if (!(wsp->ws_flags & WRDSF_DEBUG)) + { + if (wsp->ws_flags & WRDSF_ERROR) + wsp->ws_debug = wsp->ws_error; + else if (wsp->ws_flags & WRDSF_SHOWERR) + wsp->ws_debug = _wsplt_error; + else + wsp->ws_flags &= ~WRDSF_SHOWDBG; + } } wsp->ws_input = input; wsp->ws_len = len; - + if (!(wsp->ws_flags & WRDSF_DOOFFS)) wsp->ws_offs = 0; - + if (!(wsp->ws_flags & WRDSF_DELIM)) - wsp->ws_delim = " "; - + wsp->ws_delim = " \t\n"; + if (!(wsp->ws_flags & WRDSF_COMMENT)) wsp->ws_comment = NULL; - + if (wsp->ws_flags & WRDSF_REUSE) { - wsp->ws_wordn = wsp->ws_wordc + 1; if (!(wsp->ws_flags & WRDSF_APPEND)) - wsp->ws_wordc = 0; + wordsplit_free_words (wsp); } else { @@ -81,71 +145,838 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, wsp->ws_wordc = 0; wsp->ws_wordn = 0; } - if (wsp->ws_flags & WRDSF_DOOFFS) - wsp->ws_wordn += wsp->ws_offs; + + if (!(wsp->ws_flags & WRDSF_CLOSURE)) + wsp->ws_closure = NULL; wsp->ws_endp = 0; + wsp->ws_errno = 0; + wsp->ws_head = wsp->ws_tail = NULL; return 0; } static int -alloc_space (struct wordsplit *wsp) +alloc_space (struct wordsplit *wsp, size_t count) { size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; char **ptr; size_t newalloc; - + if (wsp->ws_wordv == NULL) { - newalloc = offs + ALLOC_INIT; + newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; ptr = calloc (newalloc, sizeof (ptr[0])); } - else if (wsp->ws_wordn < offs + wsp->ws_wordc + 1) + else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) { - newalloc = offs + wsp->ws_wordc + ALLOC_INCR; + newalloc = offs + wsp->ws_wordc + + count > ALLOC_INCR ? count : ALLOC_INCR; ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); } else return 0; - + if (ptr) { wsp->ws_wordn = newalloc; wsp->ws_wordv = ptr; } else + return _wsplt_nomem (wsp); + return 0; +} + + +/* Node state flags */ +#define _WSNF_NULL 0x01 /* null node (a noop) */ +#define _WSNF_WORD 0x02 /* node contains word in v.word */ +#define _WSNF_QUOTE 0x04 /* text is quoted */ +#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ +#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ +#define _WSNF_SEXP 0x20 /* is a sed expression */ + +#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that + wordsplit_add_segm must add the + segment even if it is empty */ + +struct wordsplit_node +{ + struct wordsplit_node *prev; /* Previous element */ + struct wordsplit_node *next; /* Next element */ + int flags; /* Node flags */ + union + { + struct + { + size_t beg; /* Start of word in ws_input */ + size_t end; /* End of word in ws_input */ + } segm; + char *word; + } v; +}; + +static const char * +wsnode_flagstr (int flags) +{ + static char retbuf[6]; + char *p = retbuf; + + if (flags & _WSNF_WORD) + *p++ = 'w'; + else if (flags & _WSNF_NULL) + *p++ = 'n'; + else + *p++ = '-'; + if (flags & _WSNF_QUOTE) + *p++ = 'q'; + else + *p++ = '-'; + if (flags & _WSNF_NOEXPAND) + *p++ = 'E'; + else + *p++ = '-'; + if (flags & _WSNF_JOIN) + *p++ = 'j'; + else + *p++ = '-'; + if (flags & _WSNF_SEXP) + *p++ = 's'; + else + *p++ = '-'; + *p = 0; + return retbuf; +} + +static const char * +wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return ""; + else if (p->flags & _WSNF_WORD) + return p->v.word; + else + return wsp->ws_input + p->v.segm.beg; +} + +static size_t +wsnode_len (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return 0; + else if (p->flags & _WSNF_WORD) + return strlen (p->v.word); + else + return p->v.segm.end - p->v.segm.beg; +} + +static int +wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) +{ + struct wordsplit_node *node = calloc (1, sizeof (*node)); + if (!node) + return _wsplt_nomem (wsp); + *pnode = node; + return 0; +} + +static void +wsnode_free (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_WORD) + free (p->v.word); + free (p); +} + +static void +wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) +{ + node->next = NULL; + node->prev = wsp->ws_tail; + if (wsp->ws_tail) + wsp->ws_tail->next = node; + else + wsp->ws_head = node; + wsp->ws_tail = node; +} + +static void +wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p; + + p = node->prev; + if (p) + { + p->next = node->next; + if (!node->next) + p->flags &= ~_WSNF_JOIN; + } + else + wsp->ws_head = node->next; + + p = node->next; + if (p) + p->prev = node->prev; + else + wsp->ws_tail = node->prev; + + node->next = node->prev = NULL; +} + +static void +wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, + struct wordsplit_node *anchor, int before) +{ + if (!wsp->ws_head) + { + node->next = node->prev = NULL; + wsp->ws_head = wsp->ws_tail = node; + } + else if (before) + { + if (anchor->prev) + wsnode_insert (wsp, node, anchor->prev, 0); + else + { + node->prev = NULL; + node->next = anchor; + anchor->prev = node; + wsp->ws_head = node; + } + } + else + { + struct wordsplit_node *p; + + p = anchor->next; + if (p) + p->prev = node; + else + wsp->ws_tail = node; + node->next = p; + node->prev = anchor; + anchor->next = node; + } +} + +static int +wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, + int flg) +{ + struct wordsplit_node *node; + int rc; + + if (end == beg && !(flg & _WSNF_EMPTYOK)) + return 0; + rc = wsnode_new (wsp, &node); + if (rc) + return rc; + node->flags = flg & ~(_WSNF_WORD|_WSNF_EMPTYOK); + node->v.segm.beg = beg; + node->v.segm.end = end; + wsnode_append (wsp, node); + return 0; +} + +static void +wordsplit_free_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + wsnode_free (p); + p = next; + } + wsp->ws_head = wsp->ws_tail = NULL; +} + +static void +wordsplit_dump_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + int n = 0; + + for (p = wsp->ws_head, n = 0; p; p = p->next, n++) + { + if (p->flags & _WSNF_WORD) + wsp->ws_debug ("%4d: %p: %#04x (%s):%s;", + n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); + else + wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;", + n, p, p->flags, wsnode_flagstr (p->flags), + (int)(p->v.segm.end - p->v.segm.beg), + wsp->ws_input + p->v.segm.beg); + } +} + +static int +coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p, *end; + size_t len = 0; + char *buf, *cur; + int stop; + + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) + { + len += wsnode_len (p); + } + len += wsnode_len (p); + end = p; + + buf = malloc (len + 1); + if (!buf) + return _wsplt_nomem (wsp); + cur = buf; + + p = node; + for (stop = 0; !stop;) + { + struct wordsplit_node *next = p->next; + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + + memcpy (cur, str, slen); + cur += slen; + if (p != node) + { + wsnode_remove (wsp, p); + stop = p == end; + wsnode_free (p); + } + p = next; + } + + *cur = 0; + + node->flags &= ~_WSNF_JOIN; + + if (node->flags & _WSNF_WORD) + free (node->v.word); + else + node->flags |= _WSNF_WORD; + node->v.word = buf; + return 0; +} + +static int +wsnode_quoteremoval (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + void (*uqfn) (char *, const char *, size_t) = + (wsp->ws_flags & WRDSF_CESCAPES) ? + wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + int unquote; + + if (wsp->ws_flags & WRDSF_QUOTE) + { + unquote = !(p->flags & _WSNF_NOEXPAND); + } + else + unquote = 0; + + if (unquote) + { + if (!(p->flags & _WSNF_WORD)) + { + char *newstr = malloc (slen + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + p->v.word = newstr; + p->flags |= _WSNF_WORD; + } + uqfn (p->v.word, str, slen); + } + } + return 0; +} + +static int +wsnode_coalesce (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + if (p->flags & _WSNF_JOIN) + if (coalesce_segment (wsp, p)) + return 1; + } + return 0; +} + +static int +wordsplit_finish (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + size_t n; + + n = 0; + + for (p = wsp->ws_head; p; p = p->next) + n++; + + if (alloc_space (wsp, n + 1)) + return 1; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + char *newstr = malloc (slen + 1); + + /* Assign newstr first, even if it is NULL. This way + wordsplit_free will work even if we return + nomem later. */ + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr; + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + + wsp->ws_wordc++; + + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + return 0; +} + + +/* Variable expansion */ +static int +node_split_prefix (struct wordsplit *wsp, + struct wordsplit_node **ptail, + struct wordsplit_node *node, + size_t beg, size_t len, int flg) +{ + struct wordsplit_node *newnode; + + if (len == 0) + return 0; + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + if (node->flags & _WSNF_WORD) + { + const char *str = wsnode_ptr (wsp, node); + char *newstr = malloc (len + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str + beg, len); + newstr[len] = 0; + newnode->flags = _WSNF_WORD; + newnode->v.word = newstr; + } + else + { + newnode->v.segm.beg = node->v.segm.beg + beg; + newnode->v.segm.end = newnode->v.segm.beg + len; + } + newnode->flags |= flg; + *ptail = newnode; + return 0; +} + +static int +find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) +{ + enum + { st_init, st_squote, st_dquote } state = st_init; + size_t level = 1; + + for (; i < len; i++) { - if (wsp->ws_flags & WRDSF_ENOMEMABRT) - xalloc_die (); - else if (wsp->ws_flags & WRDSF_SHOWERR) - error (0, 0, _("memory exhausted")); - errno = ENOMEM; + switch (state) + { + case st_init: + switch (str[i]) + { + case '{': + level++; + break; + + case '}': + if (--level == 0) + { + *poff = i; + return 0; + } + break; + + case '"': + state = st_dquote; + break; + + case '\'': + state = st_squote; + break; + } + break; + + case st_squote: + if (str[i] == '\'') + state = st_init; + break; + + case st_dquote: + if (str[i] == '\\') + i++; + else if (str[i] == '"') + state = st_init; + break; + } + } + return 1; +} + +static const char * +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +{ + size_t i; + + if (!(wsp->ws_flags & WRDSF_ENV)) + return NULL; + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t elen = strlen (wsp->ws_env[i]); + if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) + return wsp->ws_env[i + 1]; + /* Skip the value. Break the loop if it is NULL. */ + i++; + if (wsp->ws_env[i] == NULL) + break; + } + } + else + { + /* Usual (A=B) environment. */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t j; + const char *var = wsp->ws_env[i]; + + for (j = 0; j < len; j++) + if (name[j] != var[j]) + break; + if (j == len && var[j] == '=') + return var + j + 1; + } + } + return NULL; +} + +static int +expvar (struct wordsplit *wsp, const char *str, size_t len, + struct wordsplit_node **ptail, const char **pend, int flg) +{ + size_t i = 0; + const char *defstr = NULL; + const char *value; + const char *vptr; + struct wordsplit_node *newnode; + const char *start = str - 1; + + if (ISALPHA (str[0]) || str[0] == '_') + { + for (i = 1; i < len; i++) + if (!(ISALNUM (str[i]) || str[i] == '_')) + break; + *pend = str + i - 1; + } + else if (str[0] == '{') + { + str++; + len--; + for (i = 1; i < len; i++) + if (str[i] == '}' || str[i] == ':') + break; + if (str[i] == ':') + { + size_t j; + + defstr = str + i + 1; + if (find_closing_cbrace (str, i + 1, len, &j)) + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + *pend = str + j; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + } + else + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | flg; + newnode->v.word = malloc (3); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + newnode->v.word[0] = '$'; + newnode->v.word[1] = str[0]; + newnode->v.word[2] = 0; + *pend = str; + return 0; + } + + /* Actually expand the variable */ + /* str - start of the variable name + i - its length + defstr - default replacement str */ + + vptr = wordsplit_find_env (wsp, str, i); + if (vptr) + { + value = strdup (vptr); + if (!value) + return _wsplt_nomem (wsp); + } + else if (wsp->ws_flags & WRDSF_GETVAR) + value = wsp->ws_getvar (str, i, wsp->ws_closure); + else if (wsp->ws_flags & WRDSF_UNDEF) + { + wsp->ws_errno = WRDSE_UNDEF; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); return 1; } + else + { + if (wsp->ws_flags & WRDSF_WARNUNDEF) + wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str); + if (wsp->ws_flags & WRDSF_KEEPUNDEF) + value = NULL; + else + value = ""; + } + /* FIXME: handle defstr */ + if (value) + { + if (flg & _WSNF_QUOTE) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = strdup (value); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + else if (*value == 0) + { + /* Empty string is a special case */ + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + else + { + struct wordsplit ws; + int i; + + ws.ws_delim = wsp->ws_delim; + if (wordsplit (value, &ws, + WRDSF_NOVAR | WRDSF_NOCMD | + WRDSF_DELIM | WRDSF_SQUEEZE_DELIMS)) + { + wordsplit_free (&ws); + return 1; + } + for (i = 0; i < ws.ws_wordc; i++) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | + _WSNF_NOEXPAND | + (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg); + newnode->v.word = strdup (ws.ws_wordv[i]); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + wordsplit_free (&ws); + } + } + else if (wsp->ws_flags & WRDSF_KEEPUNDEF) + { + size_t size = *pend - start + 1; + + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = malloc (size + 1); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + memcpy (newnode->v.word, start, size); + newnode->v.word[size] = 0; + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + return 0; +} + +static int +node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) +{ + const char *str = wsnode_ptr (wsp, node); + size_t slen = wsnode_len (node); + const char *end = str + slen; + const char *p; + size_t off = 0; + struct wordsplit_node *tail = node; + + for (p = str; p < end; p++) + { + if (*p == '\\') + { + p++; + continue; + } + if (*p == '$') + { + size_t n = p - str; + + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN)) + return 1; + p++; + if (expvar (wsp, p, slen - n, &tail, &p, + node->flags & (_WSNF_JOIN | _WSNF_QUOTE))) + return 1; + off += p - str + 1; + str = p + 1; + } + } + if (p > str) + { + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, p - str, + node->flags & _WSNF_JOIN)) + return 1; + } + if (tail != node) + { + wsnode_remove (wsp, node); + wsnode_free (node); + } return 0; } static int -skip_sed_expr(const char *command, size_t i, size_t len) +wordsplit_varexp (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (!(p->flags & _WSNF_NOEXPAND)) + if (node_expand_vars (wsp, p)) + return 1; + p = next; + } + + /* Remove NULL lists */ + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_NULL) + { + wsnode_remove (wsp, p); + wsnode_free (p); + } + p = next; + } + + return 0; +} + +/* Strip off any leading and trailing whitespace. This function is called + right after the initial scanning, therefore it assumes that every + node in the list is a text reference node. */ +static void +wordsplit_trimws (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + size_t n; + + if (p->flags & _WSNF_QUOTE) + continue; + + /* Skip leading whitespace: */ + for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]); + n++) + ; + p->v.segm.beg = n; + /* Trim trailing whitespace */ + for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]); + n--); + p->v.segm.end = n; + } +} + +static int +skip_sed_expr (const char *command, size_t i, size_t len) { int state; - + do { int delim; if (command[i] == ';') i++; - if (!(command[i] == 's' && i + 3 < len && c_ispunct(command[i+1]))) + if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))) break; - + delim = command[++i]; state = 1; for (i++; i < len; i++) { if (state == 3) { - if (command[i] == delim || !c_isalnum(command[i])) + if (command[i] == delim || !ISALNUM (command[i])) break; } else if (command[i] == '\\') @@ -167,52 +998,77 @@ skip_delim (struct wordsplit *wsp) do start++; while (start < wsp->ws_len - && isdelim (wsp->ws_input[start], wsp->ws_delim)); + && ISDELIM (wsp, wsp->ws_input[start])); start--; } - + if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS)) start++; - + return start; } -#define _WRDS_WORD 1 -#define _WRDS_CONT 2 +#define _WRDS_EOF 0 +#define _WRDS_OK 1 +#define _WRDS_ERR 2 + +static int +scan_qstring (struct wordsplit *wsp, size_t start, size_t * end) +{ + size_t j; + const char *command = wsp->ws_input; + size_t len = wsp->ws_len; + char q = command[start]; + + for (j = start + 1; j < len && command[j] != q; j++) + if (q == '"' && command[j] == '\\') + j++; + if (j < len && command[j] == q) + { + int flags = _WSNF_QUOTE|_WSNF_EMPTYOK; + if (q == '\'') + flags |= _WSNF_NOEXPAND; + if (wordsplit_add_segm (wsp, start + 1, j, flags)) + return _WRDS_ERR; + *end = j; + } + else + { + wsp->ws_endp = start; + wsp->ws_errno = WRDSE_QUOTE; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return _WRDS_ERR; + } + return 0; +} static int -scan_word (struct wordsplit *wsp, size_t *pstart, size_t *pend) +scan_word (struct wordsplit *wsp, size_t start) { - size_t start = *pstart; size_t len = wsp->ws_len; const char *command = wsp->ws_input; - const char *delim = wsp->ws_delim; const char *comment = wsp->ws_comment; - + int join = 0; + int flags = 0; + size_t i = start; if (i >= len) - return WRDSE_EOF; - - if (wsp->ws_flags & WRDSF_WS) { - /* Skip initial whitespace */ - while (isws (command[i])) - if (++i == len) - return WRDSE_EOF; + wsp->ws_errno = WRDSE_EOF; + return _WRDS_EOF; } start = i; - - wsp->ws_flags &= ~_ARGCV_WORD_MASK; - + if (wsp->ws_flags & WRDSF_SED_EXPR - && command[i] == 's' && i + 3 < len && c_ispunct (command[i+1])) + && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])) { - wsp->ws_flags |= _ARGCV_WORD_SED_EXPR; + flags = _WSNF_SEXP; i = skip_sed_expr (command, i, len); } - else if (!isdelim (command[i], delim)) + else if (!ISDELIM (wsp, command[i])) { while (i < len) { @@ -221,12 +1077,12 @@ scan_word (struct wordsplit *wsp, size_t *pstart, size_t *pend) size_t j; for (j = i + 1; j < len && command[j] != '\n'; j++) ; - *pstart = start; - *pend = i; + if (wordsplit_add_segm (wsp, start, i, 0)) |