aboutsummaryrefslogtreecommitdiff
path: root/src/wordsplit.c
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-12-08 12:03:40 +0200
committerSergey Poznyakoff <gray@gnu.org>2018-12-08 12:03:40 +0200
commit602f4d93070ac0e762e0cbe3ef72ba792f9c4811 (patch)
tree8047168c6b23de38973b494b6ec794a81faf3576 /src/wordsplit.c
parent2a684f1cdd7723c2ded277ea2c7e66227b6f3ae1 (diff)
downloadvmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.gz
vmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.bz2
Implement the $(urlprefixes) built-in function.
* NEWS: Update. * README: Update. * configure.ac: Version 2.2.91 * doc/vmod-dbrw.3: Document the use of $(urlprefixes) built-in * doc/vmod-dbrw.texi: Likewise. * src/vmod_dbrw.c (parse_flags): Make sure status string is null-terminated. (do_rewrite): Expand built-in functions in $(). Support urlprefixes. On debug_level=100, produce detailed trace of expansions. * src/wordsplit.c: Pull from grecs commit 9097d529. * src/wordsplit.h: Likewise. * tests/initdb.at (rewrite): Change the url column. * tests/rewrite01.at: Use $(urlprefixes) in the SQL templates. * tests/rewrite02.at: Likewise. * tests/rewrite03.at: Likewise. * tests/rewrite04.at: Likewise. * tests/rewrite05.at: Likewise. * tests/rewrite06.at: Likewise.
Diffstat (limited to 'src/wordsplit.c')
-rw-r--r--src/wordsplit.c1646
1 files changed, 1284 insertions, 362 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c
index f4740bf..bad59b1 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -1,5 +1,5 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2014 Sergey Poznyakoff
+ Copyright (C) 2009-2018 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -25,6 +25,8 @@
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
+#include <pwd.h>
+#include <glob.h>
#if ENABLE_NLS
# include <gettext.h>
@@ -48,17 +50,23 @@
#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
+#define ISVARBEG(c) (ISALPHA(c) || c == '_')
+#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+
+#define WSP_RETURN_DELIMS(wsp) \
+ ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
+
#define ALLOC_INIT 128
#define ALLOC_INCR 128
static void
_wsplt_alloc_die (struct wordsplit *wsp)
{
- wsp->ws_error (_("memory exhausted"));
+ wsp->ws_error ("%s", _("memory exhausted"));
abort ();
}
-static void
+static void
_wsplt_error (const char *fmt, ...)
{
va_list ap;
@@ -72,6 +80,15 @@ _wsplt_error (const char *fmt, ...)
static void wordsplit_free_nodes (struct wordsplit *);
static int
+_wsplt_seterr (struct wordsplit *wsp, int ec)
+{
+ wsp->ws_errno = ec;
+ if (wsp->ws_flags & WRDSF_SHOWERR)
+ wordsplit_perror (wsp);
+ return ec;
+}
+
+static int
_wsplt_nomem (struct wordsplit *wsp)
{
errno = ENOMEM;
@@ -86,6 +103,84 @@ _wsplt_nomem (struct wordsplit *wsp)
return wsp->ws_errno;
}
+static int wordsplit_run (const char *command, size_t length,
+ struct wordsplit *wsp,
+ int flags, int lvl);
+
+static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ int flags);
+static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
+static int wordsplit_finish (struct wordsplit *wsp);
+
+static int
+_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
+ char const *str, int len,
+ int flags, int finalize)
+{
+ int rc;
+
+ wss->ws_delim = wsp->ws_delim;
+ wss->ws_debug = wsp->ws_debug;
+ wss->ws_error = wsp->ws_error;
+ wss->ws_alloc_die = wsp->ws_alloc_die;
+
+ if (!(flags & WRDSF_NOVAR))
+ {
+ wss->ws_env = wsp->ws_env;
+ wss->ws_getvar = wsp->ws_getvar;
+ flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
+ }
+ if (!(flags & WRDSF_NOCMD))
+ {
+ wss->ws_command = wsp->ws_command;
+ }
+
+ if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
+ {
+ wss->ws_closure = wsp->ws_closure;
+ flags |= wsp->ws_flags & WRDSF_CLOSURE;
+ }
+
+ wss->ws_options = wsp->ws_options;
+
+ flags |= WRDSF_DELIM
+ | WRDSF_ALLOC_DIE
+ | WRDSF_ERROR
+ | WRDSF_DEBUG
+ | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
+
+ rc = wordsplit_init (wss, str, len, flags);
+ if (rc)
+ return rc;
+ wss->ws_lvl = wsp->ws_lvl + 1;
+ rc = wordsplit_process_list (wss, 0);
+ if (rc)
+ {
+ wordsplit_free_nodes (wss);
+ return rc;
+ }
+ if (finalize)
+ {
+ rc = wordsplit_finish (wss);
+ wordsplit_free_nodes (wss);
+ }
+ return rc;
+}
+
+static void
+_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
+{
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_errno = wss->ws_errno;
+ if (wss->ws_errno == WRDSE_USERERR)
+ {
+ wsp->ws_usererr = wss->ws_usererr;
+ wss->ws_errno = WRDSE_EOF;
+ wss->ws_usererr = NULL;
+ }
+}
+
static void
wordsplit_init0 (struct wordsplit *wsp)
{
@@ -93,6 +188,7 @@ wordsplit_init0 (struct wordsplit *wsp)
{
if (!(wsp->ws_flags & WRDSF_APPEND))
wordsplit_free_words (wsp);
+ wordsplit_clearerr (wsp);
}
else
{
@@ -102,9 +198,10 @@ wordsplit_init0 (struct wordsplit *wsp)
}
wsp->ws_errno = 0;
- wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
@@ -116,23 +213,21 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_ERROR))
wsp->ws_error = _wsplt_error;
- if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+ if (!(wsp->ws_flags & WRDSF_NOVAR))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_USAGE;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ /* These will be initialized on first variable assignment */
+ wsp->ws_envidx = wsp->ws_envsiz = 0;
+ wsp->ws_envbuf = NULL;
}
if (!(wsp->ws_flags & WRDSF_NOCMD))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_NOSUPP;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ if (!wsp->ws_command)
+ {
+ _wsplt_seterr (wsp, WRDSE_USAGE);
+ errno = EINVAL;
+ return wsp->ws_errno;
+ }
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
@@ -163,10 +258,42 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_CLOSURE))
wsp->ws_closure = NULL;
+ if (!(wsp->ws_flags & WRDSF_OPTIONS))
+ wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[WRDSX_WORD])
+ wsp->ws_escape[WRDSX_WORD] = "";
+ if (!wsp->ws_escape[WRDSX_QUOTE])
+ wsp->ws_escape[WRDSX_QUOTE] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
+ wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[WRDSX_WORD] = "";
+ wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
+
wsp->ws_endp = 0;
+ wsp->ws_wordi = 0;
+ if (wsp->ws_flags & WRDSF_REUSE)
+ wordsplit_free_nodes (wsp);
+ wsp->ws_head = wsp->ws_tail = NULL;
+
wordsplit_init0 (wsp);
-
+
return 0;
}
@@ -209,6 +336,7 @@ alloc_space (struct wordsplit *wsp, size_t count)
#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
#define _WSNF_SEXP 0x20 /* is a sed expression */
+#define _WSNF_DELIM 0x40 /* node is a delimiter */
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
wordsplit_add_segm must add the
@@ -233,7 +361,7 @@ struct wordsplit_node
static const char *
wsnode_flagstr (int flags)
{
- static char retbuf[6];
+ static char retbuf[7];
char *p = retbuf;
if (flags & _WSNF_WORD)
@@ -258,6 +386,10 @@ wsnode_flagstr (int flags)
*p++ = 's';
else
*p++ = '-';
+ if (flags & _WSNF_DELIM)
+ *p++ = 'd';
+ else
+ *p++ = '-';
*p = 0;
return retbuf;
}
@@ -338,6 +470,14 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
node->next = node->prev = NULL;
}
+static struct wordsplit_node *
+wsnode_tail (struct wordsplit_node *p)
+{
+ while (p && p->next)
+ p = p->next;
+ return p;
+}
+
static void
wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
struct wordsplit_node *anchor, int before)
@@ -353,22 +493,24 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
wsnode_insert (wsp, node, anchor->prev, 0);
else
{
+ struct wordsplit_node *tail = wsnode_tail (node);
node->prev = NULL;
- node->next = anchor;
- anchor->prev = node;
+ tail->next = anchor;
+ anchor->prev = tail;
wsp->ws_head = node;
}
}
else
{
struct wordsplit_node *p;
+ struct wordsplit_node *tail = wsnode_tail (node);
p = anchor->next;
if (p)
- p->prev = node;
+ p->prev = tail;
else
- wsp->ws_tail = node;
- node->next = p;
+ wsp->ws_tail = tail;
+ tail->next = p;
node->prev = anchor;
anchor->next = node;
}
@@ -415,10 +557,12 @@ wordsplit_dump_nodes (struct wordsplit *wsp)
for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
{
if (p->flags & _WSNF_WORD)
- wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
else
- wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags),
(int) (p->v.segm.end - p->v.segm.beg),
wsp->ws_input + p->v.segm.beg);
@@ -433,6 +577,9 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
char *buf, *cur;
int stop;
+ if (!(node->flags & _WSNF_JOIN))
+ return 0;
+
for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
{
len += wsnode_len (p);
@@ -457,6 +604,7 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
cur += slen;
if (p != node)
{
+ node->flags |= p->flags & _WSNF_QUOTE;
wsnode_remove (wsp, p);
stop = p == end;
wsnode_free (p);
@@ -476,13 +624,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
return 0;
}
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
wsnode_quoteremoval (struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
@@ -491,9 +640,7 @@ wsnode_quoteremoval (struct wordsplit *wsp)
int unquote;
if (wsp->ws_flags & WRDSF_QUOTE)
- {
- unquote = !(p->flags & _WSNF_NOEXPAND);
- }
+ unquote = !(p->flags & _WSNF_NOEXPAND);
else
unquote = 0;
@@ -510,11 +657,8 @@ wsnode_quoteremoval (struct wordsplit *wsp)
p->flags |= _WSNF_WORD;
}
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
}
return 0;
@@ -535,23 +679,160 @@ wsnode_coalesce (struct wordsplit *wsp)
}
static int
+wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
+{
+ if (p->next)
+ {
+ struct wordsplit_node *np = p;
+ while (np && np->next)
+ {
+ np->flags |= _WSNF_JOIN;
+ np = np->next;
+ }
+ if (coalesce_segment (wsp, p))
+ return 1;
+ }
+ return 0;
+}
+
+static size_t skip_delim (struct wordsplit *wsp);
+
+static int
wordsplit_finish (struct wordsplit *wsp)
{
struct wordsplit_node *p;
size_t n;
+ int delim;
- n = 0;
+ /* Postprocess delimiters. It would be rather simple, if it weren't for
+ the incremental operation.
- for (p = wsp->ws_head; p; p = p->next)
- n++;
+ Nodes of type _WSNF_DELIM get inserted to the node list if either
+ WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
+
+ The following cases should be distinguished:
+
+ 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
+ any runs of similar delimiter nodes to a single node. The nodes are
+ 'similar' if they point to the same delimiter character.
+
+ If WRDSO_MAXWORDS option is set, stop compressing when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
+ remove any delimiter nodes. Stop operation when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 3. If incremental operation is in progress, restart the loop any time
+ a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
+ is set.
+ */
+ again:
+ delim = 0; /* Delimiter being processed (if any) */
+ n = 0; /* Number of words processed so far */
+ p = wsp->ws_head; /* Current node */
+
+ while (p)
+ {
+ struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM)
+ {
+ if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ {
+ if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
+ {
+ char const *s = wsnode_ptr (wsp, p);
+ if (delim)
+ {
+ if (delim == *s)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ else
+ {
+ delim = 0;
+ n++; /* Count this node; it will be returned */
+ }
+ }
+ else
+ {
+ delim = *s;
+ p = next;
+ continue;
+ }
+ }
+ }
+ else if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ }
+ else
+ {
+ if (delim)
+ {
+ /* Last node was a delimiter or a compressed run of delimiters;
+ Count it, and clear the delimiter marker */
+ n++;
+ delim = 0;
+ }
+ if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
+ break;
+ }
+ }
+ n++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ p = NULL; /* Break the loop */
+ else
+ p = next;
+ }
+
+ if (p)
+ {
+ /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
+ words have already been collected. Reconstruct a single final
+ node from the remaining nodes. */
+ if (wsnode_tail_coalesce (wsp, p))
+ return wsp->ws_errno;
+ n++;
+ }
+
+ if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
+ {
+ /* The loop above have eliminated all nodes. Restart the
+ processing, if there's any input left. */
+ if (wsp->ws_endp < wsp->ws_len)
+ {
+ int rc;
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ wsp->ws_debug (_("Restarting"));
+ rc = wordsplit_process_list (wsp, skip_delim (wsp));
+ if (rc)
+ return rc;
+ }
+ else
+ {
+ wsp->ws_error = WRDSE_EOF;
+ return WRDSE_EOF;
+ }
+ goto again;
+ }
if (alloc_space (wsp, n + 1))
- return 1;
+ return wsp->ws_errno;
- for (p = wsp->ws_head; p; p = p->next)
+ while (wsp->ws_head)
{
- const char *str = wsnode_ptr (wsp, p);
- size_t slen = wsnode_len (p);
+ const char *str = wsnode_ptr (wsp, wsp->ws_head);
+ size_t slen = wsnode_len (wsp->ws_head);
char *newstr = malloc (slen + 1);
/* Assign newstr first, even if it is NULL. This way
@@ -563,14 +844,47 @@ wordsplit_finish (struct wordsplit *wsp)
memcpy (newstr, str, slen);
newstr[slen] = 0;
+ wsnode_remove (wsp, wsp->ws_head);
+
wsp->ws_wordc++;
+ wsp->ws_wordi++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ break;
}
wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
return 0;
}
+int
+wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
+{
+ int rc;
+ size_t i;
+ rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
+ if (rc)
+ return rc;
+ for (i = 0; i < argc; i++)
+ {
+ char *newstr = strdup (argv[i]);
+ if (!newstr)
+ {
+ while (i > 0)
+ {
+ free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
+ i--;
+ }
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
+ }
+ wsp->ws_wordc += i;
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
+ return 0;
+}
+
/* Variable expansion */
static int
node_split_prefix (struct wordsplit *wsp,
@@ -607,10 +921,10 @@ node_split_prefix (struct wordsplit *wsp,
}
static int
-find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
+find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
+ char const *paren)
{
- enum
- { st_init, st_squote, st_dquote } state = st_init;
+ enum { st_init, st_squote, st_dquote } state = st_init;
size_t level = 1;
for (; i < len; i++)
@@ -620,18 +934,23 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
case st_init:
switch (str[i])
{
- case '{':
- level++;
- break;
-
- case '}':
- if (--level == 0)
+ default:
+ if (str[i] == paren[0])
+ {
+ level++;
+ break;
+ }
+ else if (str[i] == paren[1])
{
- *poff = i;
- return 0;
+ if (--level == 0)
+ {
+ *poff = i;
+ return 0;
+ }
+ break;
}
break;
-
+
case '"':
state = st_dquote;
break;
@@ -658,13 +977,14 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
return 1;
}
-static const char *
-wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
+static int
+wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
+ char const **ret)
{
size_t i;
if (!(wsp->ws_flags & WRDSF_ENV))
- return NULL;
+ return WRDSE_UNDEF;
if (wsp->ws_flags & WRDSF_ENV_KV)
{
@@ -673,14 +993,17 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
{
size_t elen = strlen (wsp->ws_env[i]);
if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
- return wsp->ws_env[i + 1];
+ {
+ *ret = wsp->ws_env[i + 1];
+ return WRDSE_OK;
+ }
/* Skip the value. Break the loop if it is NULL. */
i++;
if (wsp->ws_env[i] == NULL)
break;
}
}
- else
+ else if (wsp->ws_env)
{
/* Usual (A=B) environment. */
for (i = 0; wsp->ws_env[i]; i++)
@@ -692,10 +1015,117 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (name[j] != var[j])
break;
if (j == len && var[j] == '=')
- return var + j + 1;
+ {
+ *ret = var + j + 1;
+ return WRDSE_OK;
+ }
}
}
- return NULL;
+ return WRDSE_UNDEF;
+}
+
+static int
+wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
+ char *value)
+{
+ int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
+ char *v;
+
+ if (wsp->ws_envidx + n >= wsp->ws_envsiz)
+ {
+ size_t sz;
+ char **newenv;
+
+ if (!wsp->ws_envbuf)
+ {
+ if (wsp->ws_flags & WRDSF_ENV)
+ {
+ size_t i = 0, j;
+
+ if (wsp->ws_env)
+ {
+ for (; wsp->ws_env[i]; i++)
+ ;
+ }
+
+ sz = i + n + 1;
+
+ newenv = calloc (sz, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+
+ for (j = 0; j < i; j++)
+ {
+ newenv[j] = strdup (wsp->ws_env[j]);
+ if (!newenv[j])
+ {
+ for (; j > 1; j--)
+ free (newenv[j-1]);
+ free (newenv[j-1]);
+ return _wsplt_nomem (wsp);
+ }
+ }
+ newenv[j] = NULL;
+
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = i;
+ wsp->ws_envsiz = sz;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ else
+ {
+ newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = 0;
+ wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ wsp->ws_flags |= WRDSF_ENV;
+ }
+ }
+ else
+ {
+ wsp->ws_envsiz *= 2;
+ newenv = realloc (wsp->ws_envbuf,
+ wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ }
+
+ if (wsp->ws_flags & WRDSF_ENV_KV)
+ {
+ /* A key-value pair environment */
+ char *p = malloc (namelen + 1);
+ if (!p)
+ return _wsplt_nomem (wsp);
+ memcpy (p, name, namelen);
+ p[namelen] = 0;
+
+ v = strdup (value);
+ if (!v)
+ {
+ free (p);
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_env[wsp->ws_envidx++] = p;
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ else
+ {
+ v = malloc (namelen + strlen(value) + 2);
+ if (!v)
+ return _wsplt_nomem (wsp);
+ memcpy (v, name, namelen);
+ v[namelen++] = '=';
+ strcpy(v + namelen, value);
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ wsp->ws_env[wsp->ws_envidx++] = NULL;
+ return WRDSE_OK;
}
static int
@@ -704,15 +1134,17 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
{
size_t i = 0;
const char *defstr = NULL;
- const char *value;
+ char *value;
const char *vptr;
struct wordsplit_node *newnode;
const char *start = str - 1;
-
- if (ISALPHA (str[0]) || str[0] == '_')
+ int rc;
+ struct wordsplit ws;
+
+ if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
- if (!(ISALNUM (str[i]) || str[i] == '_'))
+ if (!ISVARCHR (str[i]))
break;
*pend = str + i - 1;
}
@@ -721,30 +1153,36 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
str++;
len--;
for (i = 1; i < len; i++)
- if (str[i] == '}' || str[i] == ':')
- break;
- if (str[i] == ':')
{
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_cbrace (str, i + 1, len, &j))
+ if (str[i] == ':')
{
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
+ size_t j;
+
+ defstr = str + i + 1;
+ if (find_closing_paren (str, i + 1, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
+ }
+ else if (str[i] == '}')
+ {
+ defstr = NULL;
+ *pend = str + i;
+ break;
+ }
+ else if (strchr ("-+?=", str[i]))
+ {
+ size_t j;
+
+ defstr = str + i;
+ if (find_closing_paren (str, i, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
}
- *pend = str + j;
- }
- else if (str[i] == '}')
- {
- defstr = NULL;
- *pend = str + i;
- }
- else
- {
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
}
+ if (i == len)
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
}
else
{
@@ -768,32 +1206,144 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
i - its length
defstr - default replacement str */
- vptr = wordsplit_find_env (wsp, str, i);
- if (vptr)
+ if (defstr && strchr("-+?=", defstr[0]) == 0)
{
- value = strdup (vptr);
- if (!value)
- return _wsplt_nomem (wsp);
+ rc = WRDSE_UNDEF;
+ defstr = NULL;
}
- else if (wsp->ws_flags & WRDSF_GETVAR)
- value = wsp->ws_getvar (str, i, wsp->ws_closure);
- else if (wsp->ws_flags & WRDSF_UNDEF)
+ else
{
- wsp->ws_errno = WRDSE_UNDEF;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return 1;
+ rc = wordsplit_find_env (wsp, str, i, &vptr);
+ if (rc == WRDSE_OK)
+ {
+ if (vptr)
+ {
+ value = strdup (vptr);
+ if (!value)
+ rc = WRDSE_NOSPACE;
+ }
+ else
+ rc = WRDSE_UNDEF;
+ }
+ else if (wsp->ws_flags & WRDSF_GETVAR)
+ rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
+ else
+ rc = WRDSE_UNDEF;
+
+ if (rc == WRDSE_OK
+ && (!value || value[0] == 0)
+ && defstr && defstr[-1] == ':')
+ {
+ free (value);
+ rc = WRDSE_UNDEF;
+ }
}
- else
+
+ switch (rc)
{
- if (wsp->ws_flags & WRDSF_WARNUNDEF)
- wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
- if (wsp->ws_flags & WRDSF_KEEPUNDEF)
- value = NULL;
+ case WRDSE_OK:
+ if (defstr && *defstr == '+')
+ {
+ size_t size = *pend - ++defstr;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
+ if (rc)
+ return rc;
+ free (value);
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+ }
+ break;
+
+ case WRDSE_UNDEF:
+ if (defstr)
+ {
+ size_t size;
+ if (*defstr == '-' || *defstr == '=')
+ {
+ size = *pend - ++defstr;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)),
+ 1);
+ if (rc)
+ return rc;
+
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+
+ if (defstr[-1] == '=')
+ wsplt_assign_var (wsp, str, i, value);
+ }
+ else
+ {
+ if (*defstr == '?')
+ {
+ size = *pend - ++defstr;
+ if (size == 0)
+ wsp->ws_error (_("%.*s: variable null or not set"),
+ (int) i, str);
+ else
+ {
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS |
+ WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)),
+ 1);
+ if (rc == 0)
+ wsp->ws_error ("%.*s: %s",
+ (int) i, str, ws.ws_wordv[0]);
+ else
+ wsp->ws_error ("%.*s: %.*s",
+ (int) i, str, (int) size, defstr);
+ wordsplit_free (&ws);
+ }
+ }
+ value = NULL;
+ }
+ }
+ else if (wsp->ws_flags & WRDSF_UNDEF)
+ {
+ _wsplt_seterr (wsp, WRDSE_UNDEF);
+ return 1;
+ }
else
- value = "";
+ {
+ if (wsp->ws_flags & WRDSF_WARNUNDEF)
+ wsp->ws_error (_("warning: undefined variable `%.*s'"),
+ (int) i, str);
+ if (wsp->ws_flags & WRDSF_KEEPUNDEF)
+ value = NULL;
+ else
+ {
+ value = strdup ("");
+ if (!value)
+ return _wsplt_nomem (wsp);
+ }
+ }
+ break;
+
+ case WRDSE_NOSPACE:
+ return _wsplt_nomem (wsp);
+
+ case WRDSE_USERERR:
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_usererr = value;
+ /* fall through */
+ default:
+ _wsplt_seterr (wsp, rc);
+ return 1;
}
- /* FIXME: handle defstr */
+
if (value)
{
if (flg & _WSNF_QUOTE)
@@ -803,12 +1353,11 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
- newnode->v.word = strdup (value);
- if (!newnode->v.word)
- return _wsplt_nomem (wsp);
+ newnode->v.word = value;
}
else if (*value == 0)
{
+ free (value);
/* Empty string is a special case */
if (wsnode_new (wsp, &newnode))
return 1;
@@ -819,28 +1368,23 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
else
{
struct wordsplit ws;
- int i;
-
- ws.ws_delim = wsp->ws_delim;
- if (wordsplit (value, &ws,
- WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
+ int rc;
+
+ rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
+ WRDSF_NOVAR | WRDSF_NOCMD |
+ WRDSF_QUOTE
+ | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
+ 0);
+ free (value);
+ if (rc)
{
+ _wsplt_seterr_sub (wsp, &ws);
wordsplit_free (&ws);
return 1;
}
- for (i = 0; i < ws.ws_wordc; i++)
- {
- if (wsnode_new (wsp, &newnode))
- return 1;
- wsnode_insert (wsp, newnode, *ptail, 0);
- *ptail = newnode;
- newnode->flags = _WSNF_WORD |
- _WSNF_NOEXPAND |
- (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
- newnode->v.word = strdup (ws.ws_wordv[i]);
- if (!newnode->v.word)
- return _wsplt_nomem (wsp);
- }
+ wsnode_insert (wsp, ws.ws_head, *ptail, 0);
+ *ptail = ws.ws_tail;
+ ws.ws_head = ws.ws_tail = NULL;
wordsplit_free (&ws);
}
}
@@ -871,7 +1415,19 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
}
static int
-node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
+begin_var_p (int c)
+{
+ return c == '{' || ISVARBEG (c);
+}
+
+static int
+node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
+ int (*beg_p) (int),
+ int (*ws_exp_fn) (struct wordsplit *wsp,
+ const char *str, size_t len,
+ struct wordsplit_node **ptail,
+ const char **pend,
+ int flg))
{
const char *str = wsnode_ptr (wsp, node);
size_t slen = wsnode_len (node);
@@ -887,7 +1443,7 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
p++;
continue;
}
- if (*p == '$')
+ if (*p == '$' && beg_p (p[1]))
{
size_t n = p - str;
@@ -896,8 +1452,8 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
return 1;
p++;
- if (expvar (wsp, p, slen - n, &tail, &p,
- node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
+ if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
+ node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
return 1;
off += p - str + 1;
str = p + 1;
@@ -908,7 +1464,7 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
if (tail != node)
tail->flags |= _WSNF_JOIN;
if (node_split_prefix (wsp, &tail, node, off, p - str,
- node->flags & _WSNF_JOIN))
+ node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
return 1;
}
if (tail != node)
@@ -918,8 +1474,8 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
}
return 0;
}
-
-/* Remove NULL lists */
+
+/* Remove NULL nodes from the list */
static void
wsnode_nullelim (struct wordsplit *wsp)
{
@@ -928,6 +1484,8 @@ wsnode_nullelim (struct wordsplit *wsp)
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM && p->prev)
+ p->prev->flags &= ~_WSNF_JOIN;
if (p->flags & _WSNF_NULL)
{
wsnode_remove (wsp, p);
@@ -945,8 +1503,131 @@ wordsplit_varexp (struct wordsplit *wsp)
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
+ if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
+ if (node_expand (wsp, p, begin_var_