diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-12-08 12:03:40 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-12-08 12:03:40 +0200 |
commit | 602f4d93070ac0e762e0cbe3ef72ba792f9c4811 (patch) | |
tree | 8047168c6b23de38973b494b6ec794a81faf3576 /src/wordsplit.c | |
parent | 2a684f1cdd7723c2ded277ea2c7e66227b6f3ae1 (diff) | |
download | vmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.gz vmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.bz2 |
Implement the $(urlprefixes) built-in function.
* NEWS: Update.
* README: Update.
* configure.ac: Version 2.2.91
* doc/vmod-dbrw.3: Document the use of $(urlprefixes) built-in
* doc/vmod-dbrw.texi: Likewise.
* src/vmod_dbrw.c (parse_flags): Make sure status string is null-terminated.
(do_rewrite): Expand built-in functions in $(). Support urlprefixes.
On debug_level=100, produce detailed trace of expansions.
* src/wordsplit.c: Pull from grecs commit 9097d529.
* src/wordsplit.h: Likewise.
* tests/initdb.at (rewrite): Change the url column.
* tests/rewrite01.at: Use $(urlprefixes) in the SQL templates.
* tests/rewrite02.at: Likewise.
* tests/rewrite03.at: Likewise.
* tests/rewrite04.at: Likewise.
* tests/rewrite05.at: Likewise.
* tests/rewrite06.at: Likewise.
Diffstat (limited to 'src/wordsplit.c')
-rw-r--r-- | src/wordsplit.c | 1646 |
1 files changed, 1284 insertions, 362 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c index f4740bf..bad59b1 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -1,3 +1,3 @@ /* wordsplit - a word splitter - Copyright (C) 2009-2014 Sergey Poznyakoff + Copyright (C) 2009-2018 Sergey Poznyakoff @@ -27,2 +27,4 @@ #include <stdarg.h> +#include <pwd.h> +#include <glob.h> @@ -50,2 +52,8 @@ +#define ISVARBEG(c) (ISALPHA(c) || c == '_') +#define ISVARCHR(c) (ISALNUM(c) || c == '_') + +#define WSP_RETURN_DELIMS(wsp) \ + ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS)) + #define ALLOC_INIT 128 @@ -56,3 +64,3 @@ _wsplt_alloc_die (struct wordsplit *wsp) { - wsp->ws_error (_("memory exhausted")); + wsp->ws_error ("%s", _("memory exhausted")); abort (); @@ -60,3 +68,3 @@ _wsplt_alloc_die (struct wordsplit *wsp) -static void +static void _wsplt_error (const char *fmt, ...) @@ -74,2 +82,11 @@ static void wordsplit_free_nodes (struct wordsplit *); static int +_wsplt_seterr (struct wordsplit *wsp, int ec) +{ + wsp->ws_errno = ec; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return ec; +} + +static int _wsplt_nomem (struct wordsplit *wsp) @@ -88,2 +105,80 @@ _wsplt_nomem (struct wordsplit *wsp) +static int wordsplit_run (const char *command, size_t length, + struct wordsplit *wsp, + int flags, int lvl); + +static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags); +static int wordsplit_process_list (struct wordsplit *wsp, size_t start); +static int wordsplit_finish (struct wordsplit *wsp); + +static int +_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, + char const *str, int len, + int flags, int finalize) +{ + int rc; + + wss->ws_delim = wsp->ws_delim; + wss->ws_debug = wsp->ws_debug; + wss->ws_error = wsp->ws_error; + wss->ws_alloc_die = wsp->ws_alloc_die; + + if (!(flags & WRDSF_NOVAR)) + { + wss->ws_env = wsp->ws_env; + wss->ws_getvar = wsp->ws_getvar; + flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR); + } + if (!(flags & WRDSF_NOCMD)) + { + wss->ws_command = wsp->ws_command; + } + + if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD)) + { + wss->ws_closure = wsp->ws_closure; + flags |= wsp->ws_flags & WRDSF_CLOSURE; + } + + wss->ws_options = wsp->ws_options; + + flags |= WRDSF_DELIM + | WRDSF_ALLOC_DIE + | WRDSF_ERROR + | WRDSF_DEBUG + | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS)); + + rc = wordsplit_init (wss, str, len, flags); + if (rc) + return rc; + wss->ws_lvl = wsp->ws_lvl + 1; + rc = wordsplit_process_list (wss, 0); + if (rc) + { + wordsplit_free_nodes (wss); + return rc; + } + if (finalize) + { + rc = wordsplit_finish (wss); + wordsplit_free_nodes (wss); + } + return rc; +} + +static void +_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss) +{ + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_errno = wss->ws_errno; + if (wss->ws_errno == WRDSE_USERERR) + { + wsp->ws_usererr = wss->ws_usererr; + wss->ws_errno = WRDSE_EOF; + wss->ws_usererr = NULL; + } +} + static void @@ -95,2 +190,3 @@ wordsplit_init0 (struct wordsplit *wsp) wordsplit_free_words (wsp); + wordsplit_clearerr (wsp); } @@ -104,5 +200,6 @@ wordsplit_init0 (struct wordsplit *wsp) wsp->ws_errno = 0; - wsp->ws_head = wsp->ws_tail = NULL; } +char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + static int @@ -118,10 +215,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, - if (!(wsp->ws_flags & WRDSF_NOVAR) - && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) + if (!(wsp->ws_flags & WRDSF_NOVAR)) { - errno = EINVAL; - wsp->ws_errno = WRDSE_USAGE; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return wsp->ws_errno; + /* These will be initialized on first variable assignment */ + wsp->ws_envidx = wsp->ws_envsiz = 0; + wsp->ws_envbuf = NULL; } @@ -130,7 +224,8 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, { - errno = EINVAL; - wsp->ws_errno = WRDSE_NOSUPP; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return wsp->ws_errno; + if (!wsp->ws_command) + { + _wsplt_seterr (wsp, WRDSE_USAGE); + errno = EINVAL; + return wsp->ws_errno; + } } @@ -165,6 +260,38 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + if (!(wsp->ws_flags & WRDSF_OPTIONS)) + wsp->ws_options = 0; + + if (wsp->ws_flags & WRDSF_ESCAPE) + { + if (!wsp->ws_escape[WRDSX_WORD]) + wsp->ws_escape[WRDSX_WORD] = ""; + if (!wsp->ws_escape[WRDSX_QUOTE]) + wsp->ws_escape[WRDSX_QUOTE] = ""; + } + else + { + if (wsp->ws_flags & WRDSF_CESCAPES) + { + wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; + wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; + wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD + | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; + } + else + { + wsp->ws_escape[WRDSX_WORD] = ""; + wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\""; + wsp->ws_options |= WRDSO_BSKEEP_QUOTE; + } + } + wsp->ws_endp = 0; + wsp->ws_wordi = 0; + if (wsp->ws_flags & WRDSF_REUSE) + wordsplit_free_nodes (wsp); + wsp->ws_head = wsp->ws_tail = NULL; + wordsplit_init0 (wsp); - + return 0; @@ -211,2 +338,3 @@ alloc_space (struct wordsplit *wsp, size_t count) #define _WSNF_SEXP 0x20 /* is a sed expression */ +#define _WSNF_DELIM 0x40 /* node is a delimiter */ @@ -235,3 +363,3 @@ wsnode_flagstr (int flags) { - static char retbuf[6]; + static char retbuf[7]; char *p = retbuf; @@ -260,2 +388,6 @@ wsnode_flagstr (int flags) *p++ = '-'; + if (flags & _WSNF_DELIM) + *p++ = 'd'; + else + *p++ = '-'; *p = 0; @@ -340,2 +472,10 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +static struct wordsplit_node * +wsnode_tail (struct wordsplit_node *p) +{ + while (p && p->next) + p = p->next; + return p; +} + static void @@ -355,5 +495,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, { + struct wordsplit_node *tail = wsnode_tail (node); node->prev = NULL; - node->next = anchor; - anchor->prev = node; + tail->next = anchor; + anchor->prev = tail; wsp->ws_head = node; @@ -364,2 +505,3 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, struct wordsplit_node *p; + struct wordsplit_node *tail = wsnode_tail (node); @@ -367,6 +509,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, if (p) - p->prev = node; + p->prev = tail; else - wsp->ws_tail = node; - node->next = p; + wsp->ws_tail = tail; + tail->next = p; node->prev = anchor; @@ -417,6 +559,8 @@ wordsplit_dump_nodes (struct wordsplit *wsp) if (p->flags & _WSNF_WORD) - wsp->ws_debug ("%4d: %p: %#04x (%s):%s;", + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;", + wsp->ws_lvl, n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); else - wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;", + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;", + wsp->ws_lvl, n, p, p->flags, wsnode_flagstr (p->flags), @@ -435,2 +579,5 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) + if (!(node->flags & _WSNF_JOIN)) + return 0; + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) @@ -459,2 +606,3 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) { + node->flags |= p->flags & _WSNF_QUOTE; wsnode_remove (wsp, p); @@ -478,2 +626,6 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, + char *dst, const char *src, + size_t n); + static int @@ -482,5 +634,2 @@ wsnode_quoteremoval (struct wordsplit *wsp) struct wordsplit_node *p; - void (*uqfn) (char *, const char *, size_t) = - (wsp->ws_flags & WRDSF_CESCAPES) ? - wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; @@ -493,5 +642,3 @@ wsnode_quoteremoval (struct wordsplit *wsp) if (wsp->ws_flags & WRDSF_QUOTE) - { - unquote = !(p->flags & _WSNF_NOEXPAND); - } + unquote = !(p->flags & _WSNF_NOEXPAND); else @@ -512,7 +659,4 @@ wsnode_quoteremoval (struct wordsplit *wsp) - if (wsp->ws_flags & WRDSF_ESCAPE) - wordsplit_general_unquote_copy (p->v.word, str, slen, - wsp->ws_escape); - else - uqfn (p->v.word, str, slen); + wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE, + p->v.word, str, slen); } @@ -537,2 +681,21 @@ wsnode_coalesce (struct wordsplit *wsp) static int +wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->next) + { + struct wordsplit_node *np = p; + while (np && np->next) + { + np->flags |= _WSNF_JOIN; + np = np->next; + } + if (coalesce_segment (wsp, p)) + return 1; + } + return 0; +} + +static size_t skip_delim (struct wordsplit *wsp); + +static int wordsplit_finish (struct wordsplit *wsp) @@ -541,15 +704,133 @@ wordsplit_finish (struct wordsplit *wsp) size_t n; + int delim; - n = 0; + /* Postprocess delimiters. It would be rather simple, if it weren't for + the incremental operation. - for (p = wsp->ws_head; p; p = p->next) - n++; + Nodes of type _WSNF_DELIM get inserted to the node list if either + WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set. + + The following cases should be distinguished: + + 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress + any runs of similar delimiter nodes to a single node. The nodes are + 'similar' if they point to the same delimiter character. + + If WRDSO_MAXWORDS option is set, stop compressing when + ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into + a single last node. + + 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not, + remove any delimiter nodes. Stop operation when + ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into + a single last node. + + 3. If incremental operation is in progress, restart the loop any time + a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS + is set. + */ + again: + delim = 0; /* Delimiter being processed (if any) */ + n = 0; /* Number of words processed so far */ + p = wsp->ws_head; /* Current node */ + + while (p) + { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_DELIM) + { + if (wsp->ws_flags & WRDSF_RETURN_DELIMS) + { + if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) + { + char const *s = wsnode_ptr (wsp, p); + if (delim) + { + if (delim == *s) + { + wsnode_remove (wsp, p); + p = next; + continue; + } + else + { + delim = 0; + n++; /* Count this node; it will be returned */ + } + } + else + { + delim = *s; + p = next; + continue; + } + } + } + else if (wsp->ws_options & WRDSO_MAXWORDS) + { + wsnode_remove (wsp, p); + p = next; + continue; + } + } + else + { + if (delim) + { + /* Last node was a delimiter or a compressed run of delimiters; + Count it, and clear the delimiter marker */ + n++; + delim = 0; + } + if (wsp->ws_options & WRDSO_MAXWORDS) + { + if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords) + break; + } + } + n++; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + p = NULL; /* Break the loop */ + else + p = next; + } + + if (p) + { + /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords + words have already been collected. Reconstruct a single final + node from the remaining nodes. */ + if (wsnode_tail_coalesce (wsp, p)) + return wsp->ws_errno; + n++; + } + + if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL)) + { + /* The loop above have eliminated all nodes. Restart the + processing, if there's any input left. */ + if (wsp->ws_endp < wsp->ws_len) + { + int rc; + if (wsp->ws_flags & WRDSF_SHOWDBG) + wsp->ws_debug (_("Restarting")); + rc = wordsplit_process_list (wsp, skip_delim (wsp)); + if (rc) + return rc; + } + else + { + wsp->ws_error = WRDSE_EOF; + return WRDSE_EOF; + } + goto again; + } if (alloc_space (wsp, n + 1)) - return 1; + return wsp->ws_errno; - for (p = wsp->ws_head; p; p = p->next) + while (wsp->ws_head) { - const char *str = wsnode_ptr (wsp, p); - size_t slen = wsnode_len (p); + const char *str = wsnode_ptr (wsp, wsp->ws_head); + size_t slen = wsnode_len (wsp->ws_head); char *newstr = malloc (slen + 1); @@ -565,4 +846,9 @@ wordsplit_finish (struct wordsplit *wsp) + wsnode_remove (wsp, wsp->ws_head); + wsp->ws_wordc++; + wsp->ws_wordi++; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + break; } @@ -572,3 +858,31 @@ wordsplit_finish (struct wordsplit *wsp) +int +wordsplit_append (wordsplit_t *wsp, int argc, char **argv) +{ + int rc; + size_t i; + rc = alloc_space (wsp, wsp->ws_wordc + argc + 1); + if (rc) + return rc; + for (i = 0; i < argc; i++) + { + char *newstr = strdup (argv[i]); + if (!newstr) + { + while (i > 0) + { + free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]); + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL; + i--; + } + return _wsplt_nomem (wsp); + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr; + } + wsp->ws_wordc += i; + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + return 0; +} + /* Variable expansion */ @@ -609,6 +923,6 @@ node_split_prefix (struct wordsplit *wsp, static int -find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) +find_closing_paren (const char *str, size_t i, size_t len, size_t *poff, + char const *paren) { - enum - { st_init, st_squote, st_dquote } state = st_init; + enum { st_init, st_squote, st_dquote } state = st_init; size_t level = 1; @@ -622,14 +936,19 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) { - case '{': - level++; - break; - - case '}': - if (--level == 0) + default: + if (str[i] == paren[0]) + { + level++; + break; + } + else if (str[i] == paren[1]) { - *poff = i; - return 0; + if (--level == 0) + { + *poff = i; + return 0; + } + break; } break; - + case '"': @@ -660,4 +979,5 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) -static const char * -wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +static int +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len, + char const **ret) { @@ -666,3 +986,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (!(wsp->ws_flags & WRDSF_ENV)) - return NULL; + return WRDSE_UNDEF; @@ -675,3 +995,6 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) - return wsp->ws_env[i + 1]; + { + *ret = wsp->ws_env[i + 1]; + return WRDSE_OK; + } /* Skip the value. Break the loop if it is NULL. */ @@ -682,3 +1005,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) } - else + else if (wsp->ws_env) { @@ -694,6 +1017,113 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (j == len && var[j] == '=') - return var + j + 1; + { + *ret = var + j + 1; + return WRDSE_OK; + } } } - return NULL; + return WRDSE_UNDEF; +} + +static int +wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen, + char *value) +{ + int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1; + char *v; + + if (wsp->ws_envidx + n >= wsp->ws_envsiz) + { + size_t sz; + char **newenv; + + if (!wsp->ws_envbuf) + { + if (wsp->ws_flags & WRDSF_ENV) + { + size_t i = 0, j; + + if (wsp->ws_env) + { + for (; wsp->ws_env[i]; i++) + ; + } + + sz = i + n + 1; + + newenv = calloc (sz, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + + for (j = 0; j < i; j++) + { + newenv[j] = strdup (wsp->ws_env[j]); + if (!newenv[j]) + { + for (; j > 1; j--) + free (newenv[j-1]); + free (newenv[j-1]); + return _wsplt_nomem (wsp); + } + } + newenv[j] = NULL; + + wsp->ws_envbuf = newenv; + wsp->ws_envidx = i; + wsp->ws_envsiz = sz; + wsp->ws_env = (const char**) wsp->ws_envbuf; + } + else + { + newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_envidx = 0; + wsp->ws_envsiz = WORDSPLIT_ENV_INIT; + wsp->ws_env = (const char**) wsp->ws_envbuf; + wsp->ws_flags |= WRDSF_ENV; + } + } + else + { + wsp->ws_envsiz *= 2; + newenv = realloc (wsp->ws_envbuf, + wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_env = (const char**) wsp->ws_envbuf; + } + } + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + char *p = malloc (namelen + 1); + if (!p) + return _wsplt_nomem (wsp); + memcpy (p, name, namelen); + p[namelen] = 0; + + v = strdup (value); + if (!v) + { + free (p); + return _wsplt_nomem (wsp); + } + wsp->ws_env[wsp->ws_envidx++] = p; + wsp->ws_env[wsp->ws_envidx++] = v; + } + else + { + v = malloc (namelen + strlen(value) + 2); + if (!v) + return _wsplt_nomem (wsp); + memcpy (v, name, namelen); + v[namelen++] = '='; + strcpy(v + namelen, value); + wsp->ws_env[wsp->ws_envidx++] = v; + } + wsp->ws_env[wsp->ws_envidx++] = NULL; + return WRDSE_OK; } @@ -706,3 +1136,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, const char *defstr = NULL; - const char *value; + char *value; const char *vptr; @@ -710,7 +1140,9 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, const char *start = str - 1; - - if (ISALPHA (str[0]) || str[0] == '_') + int rc; + struct wordsplit ws; + + if (ISVARBEG (str[0])) { for (i = 1; i < len; i++) - if (!(ISALNUM (str[i]) || str[i] == '_')) + if (!ISVARCHR (str[i])) break; @@ -723,26 +1155,32 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, for (i = 1; i < len; i++) - if (str[i] == '}' || str[i] == ':') - break; - if (str[i] == ':') { - size_t j; - - defstr = str + i + 1; - if (find_closing_cbrace (str, i + 1, len, &j)) + if (str[i] == ':') { - wsp->ws_errno = WRDSE_CBRACE; - return 1; + size_t j; + + defstr = str + i + 1; + if (find_closing_paren (str, i + 1, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + break; + } + else if (strchr ("-+?=", str[i])) + { + size_t j; + + defstr = str + i; + if (find_closing_paren (str, i, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; } - *pend = str + j; - } - else if (str[i] == '}') - { - defstr = NULL; - *pend = str + i; - } - else - { - wsp->ws_errno = WRDSE_CBRACE; - return 1; } + if (i == len) + return _wsplt_seterr (wsp, WRDSE_CBRACE); } @@ -770,28 +1208,140 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, - vptr = wordsplit_find_env (wsp, str, i); - if (vptr) + if (defstr && strchr("-+?=", defstr[0]) == 0) { - value = strdup (vptr); - if (!value) - return _wsplt_nomem (wsp); + rc = WRDSE_UNDEF; + defstr = NULL; } - else if (wsp->ws_flags & WRDSF_GETVAR) - value = wsp->ws_getvar (str, i, wsp->ws_closure); - else if (wsp->ws_flags & WRDSF_UNDEF) + else { - wsp->ws_errno = WRDSE_UNDEF; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return 1; + rc = wordsplit_find_env (wsp, str, i, &vptr); + if (rc == WRDSE_OK) + { + if (vptr) + { + value = strdup (vptr); + if (!value) + rc = WRDSE_NOSPACE; + } + else + rc = WRDSE_UNDEF; + } + else if (wsp->ws_flags & WRDSF_GETVAR) + rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure); + else + rc = WRDSE_UNDEF; + + if (rc == WRDSE_OK + && (!value || value[0] == 0) + && defstr && defstr[-1] == ':') + { + free (value); + rc = WRDSE_UNDEF; + } } - else + + switch (rc) { - if (wsp->ws_flags & WRDSF_WARNUNDEF) - wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str); - if (wsp->ws_flags & WRDSF_KEEPUNDEF) - value = NULL; + case WRDSE_OK: + if (defstr && *defstr == '+') + { + size_t size = *pend - ++defstr; + + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD)), 1); + if (rc) + return rc; + free (value); + value = ws.ws_wordv[0]; + ws.ws_wordv[0] = NULL; + wordsplit_free (&ws); + } + break; + + case WRDSE_UNDEF: + if (defstr) + { + size_t size; + if (*defstr == '-' || *defstr == '=') + { + size = *pend - ++defstr; + + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD)), + 1); + if (rc) + return rc; + + value = ws.ws_wordv[0]; + ws.ws_wordv[0] = NULL; + wordsplit_free (&ws); + + if (defstr[-1] == '=') + wsplt_assign_var (wsp, str, i, value); + } + else + { + if (*defstr == '?') + { + size = *pend - ++defstr; + if (size == 0) + wsp->ws_error (_("%.*s: variable null or not set"), + (int) i, str); + else + { + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | + WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD)), + 1); + if (rc == 0) + wsp->ws_error ("%.*s: %s", + (int) i, str, ws.ws_wordv[0]); + else + wsp->ws_error ("%.*s: %.*s", + (int) i, str, (int) size, defstr); + wordsplit_free (&ws); + } + } + value = NULL; + } + } + else if (wsp->ws_flags & WRDSF_UNDEF) + { + _wsplt_seterr (wsp, WRDSE_UNDEF); + return 1; + } else - value = ""; + { + if (wsp->ws_flags & WRDSF_WARNUNDEF) + wsp->ws_error (_("warning: undefined variable `%.*s'"), + (int) i, str); + if (wsp->ws_flags & WRDSF_KEEPUNDEF) + value = NULL; + else + { + value = strdup (""); + if (!value) + return _wsplt_nomem (wsp); + } + } + break; + + case WRDSE_NOSPACE: + return _wsplt_nomem (wsp); + + case WRDSE_USERERR: + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_usererr = value; + /* fall through */ + default: + _wsplt_seterr (wsp, rc); + return 1; } - /* FIXME: handle defstr */ + if (value) @@ -805,5 +1355,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; - newnode->v.word = strdup (value); - if (!newnode->v.word) - return _wsplt_nomem (wsp); + newnode->v.word = value; } @@ -811,2 +1359,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, { + free (value); /* Empty string is a special case */ @@ -821,8 +1370,13 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, struct wordsplit ws; - int i; - - ws.ws_delim = wsp->ws_delim; - if (wordsplit (value, &ws, - WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS)) + int rc; + + rc = _wsplt_subsplit (wsp, &ws, value, strlen (value), + WRDSF_NOVAR | WRDSF_NOCMD | + WRDSF_QUOTE + | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) , + 0); + free (value); + if (rc) { + _wsplt_seterr_sub (wsp, &ws); wordsplit_free (&ws); @@ -830,15 +1384,5 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, } - for (i = 0; i < ws.ws_wordc; i++) - { - if (wsnode_new (wsp, &newnode)) - return 1; - wsnode_insert (wsp, newnode, *ptail, 0); - *ptail = newnode; - newnode->flags = _WSNF_WORD | - _WSNF_NOEXPAND | - (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg); - newnode->v.word = strdup (ws.ws_wordv[i]); - if (!newnode->v.word) - return _wsplt_nomem (wsp); - } + wsnode_insert (wsp, ws.ws_head, *ptail, 0); + *ptail = ws.ws_tail; + ws.ws_head = ws.ws_tail = NULL; wordsplit_free (&ws); @@ -873,3 +1417,15 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, static int -node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) +begin_var_p (int c) +{ + return c == '{' || ISVARBEG (c); +} + +static int +node_expand (struct wordsplit *wsp, struct wordsplit_node *node, + int (*beg_p) (int), + int (*ws_exp_fn) (struct wordsplit *wsp, + const char *str, size_t len, + struct wordsplit_node **ptail, + const char **pend, + int flg)) { @@ -889,3 +1445,3 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) } - if (*p == '$') + if (*p == '$' && beg_p (p[1])) { @@ -898,4 +1454,4 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) p++; - if (expvar (wsp, p, slen - n, &tail, &p, - node->flags & (_WSNF_JOIN | _WSNF_QUOTE))) + if (ws_exp_fn (wsp, p, slen - n, &tail, &p, + node->flags & (_WSNF_JOIN | _WSNF_QUOTE))) return 1; @@ -910,3 +1466,3 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) if (node_split_prefix (wsp, &tail, node, off, p - str, - node->flags & _WSNF_JOIN)) + node->flags & (_WSNF_JOIN|_WSNF_QUOTE))) return 1; @@ -920,4 +1476,4 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) } - -/* Remove NULL lists */ + +/* Remove NULL nodes from the list */ static void @@ -930,2 +1486,4 @@ wsnode_nullelim (struct wordsplit *wsp) struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_DELIM && p->prev) + p->prev->flags &= ~_WSNF_JOIN; if (p->flags & _WSNF_NULL) @@ -947,4 +1505,127 @@ wordsplit_varexp (struct wordsplit *wsp) struct wordsplit_node *next = p->next; + if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM))) + if (node_expand (wsp, p, begin_var_p, expvar)) + return 1; + p = next; + } + + wsnode_nullelim (wsp); + return 0; +} + +static int +begin_cmd_p (int c) +{ + return c == '('; +} + +static int +expcmd (struct wordsplit *wsp, const char *str, size_t len, + struct wordsplit_node **ptail, const char **pend, int flg) +{ + int rc; + size_t j; + char *value; + struct wordsplit_node *newnode; + struct wordsplit ws; + + str++; + len--; + + if (find_closing_paren (str, 0, len, &j, "()")) + { + _wsplt_seterr (wsp, WRDSE_PAREN); + return 1; + } + + *pend = str + j; + rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1); + if (rc) + { + _wsplt_seterr_sub (wsp, &ws); + wordsplit_free (&ws); + return 1; + } + rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure); + wordsplit_free (&ws); + + if (rc == WRDSE_NOSPACE) + return _wsplt_nomem (wsp); + else if (rc) + { + if (rc == WRDSE_USERERR) + { + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_usererr = value; + } + _wsplt_seterr (wsp, rc); + return 1; + } + + if (value) + { + if (flg & _WSNF_QUOTE) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = value; + } + else if (*value == 0) + { + free (value); + /* Empty string is a special case */ + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + else + { + struct wordsplit ws; + int rc; + + rc = _wsplt_subsplit (wsp, &ws, value, strlen (value), + WRDSF_NOVAR | WRDSF_NOCMD + | WRDSF_WS | WRDSF_QUOTE + | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0), + 0); + free (value); + if (rc) + { + _wsplt_seterr_sub (wsp, &ws); + wordsplit_free (&ws); + return 1; + } + wsnode_insert (wsp, ws.ws_head, *ptail, 0); + *ptail = ws.ws_tail; + ws.ws_head = ws.ws_tail = NULL; + wordsplit_free (&ws); + } + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + return 0; +} + +static int +wordsplit_cmdexp (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; if (!(p->flags & _WSNF_NOEXPAND)) - if (node_expand_vars (wsp, p)) + if (node_expand (wsp, p, begin_cmd_p, expcmd)) return 1; @@ -960,3 +1641,3 @@ wordsplit_varexp (struct wordsplit *wsp) node in the list is a text reference node. */ -static void +static int wordsplit_trimws (struct wordsplit *wsp) @@ -969,10 +1650,17 @@ wordsplit_trimws (struct wordsplit *wsp) + if (!(p->flags & _WSNF_QUOTE)) + { + /* Skip leading whitespace: */ + for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]); + n++) + ; + p->v.segm.beg = n; + } + + while (p->next && (p->flags & _WSNF_JOIN)) + p = p->next; + |