summaryrefslogtreecommitdiffabout
path: root/src/wordsplit.c
Side-by-side diff
Diffstat (limited to 'src/wordsplit.c') (more/less context) (ignore whitespace changes)
-rw-r--r--src/wordsplit.c1646
1 files changed, 1284 insertions, 362 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c
index f4740bf..bad59b1 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -1,5 +1,5 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2014 Sergey Poznyakoff
+ Copyright (C) 2009-2018 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -25,6 +25,8 @@
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
+#include <pwd.h>
+#include <glob.h>
#if ENABLE_NLS
# include <gettext.h>
@@ -48,17 +50,23 @@
#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
+#define ISVARBEG(c) (ISALPHA(c) || c == '_')
+#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+
+#define WSP_RETURN_DELIMS(wsp) \
+ ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
+
#define ALLOC_INIT 128
#define ALLOC_INCR 128
static void
_wsplt_alloc_die (struct wordsplit *wsp)
{
- wsp->ws_error (_("memory exhausted"));
+ wsp->ws_error ("%s", _("memory exhausted"));
abort ();
}
-static void
+static void
_wsplt_error (const char *fmt, ...)
{
va_list ap;
@@ -72,6 +80,15 @@ _wsplt_error (const char *fmt, ...)
static void wordsplit_free_nodes (struct wordsplit *);
static int
+_wsplt_seterr (struct wordsplit *wsp, int ec)
+{
+ wsp->ws_errno = ec;
+ if (wsp->ws_flags & WRDSF_SHOWERR)
+ wordsplit_perror (wsp);
+ return ec;
+}
+
+static int
_wsplt_nomem (struct wordsplit *wsp)
{
errno = ENOMEM;
@@ -86,6 +103,84 @@ _wsplt_nomem (struct wordsplit *wsp)
return wsp->ws_errno;
}
+static int wordsplit_run (const char *command, size_t length,
+ struct wordsplit *wsp,
+ int flags, int lvl);
+
+static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ int flags);
+static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
+static int wordsplit_finish (struct wordsplit *wsp);
+
+static int
+_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
+ char const *str, int len,
+ int flags, int finalize)
+{
+ int rc;
+
+ wss->ws_delim = wsp->ws_delim;
+ wss->ws_debug = wsp->ws_debug;
+ wss->ws_error = wsp->ws_error;
+ wss->ws_alloc_die = wsp->ws_alloc_die;
+
+ if (!(flags & WRDSF_NOVAR))
+ {
+ wss->ws_env = wsp->ws_env;
+ wss->ws_getvar = wsp->ws_getvar;
+ flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
+ }
+ if (!(flags & WRDSF_NOCMD))
+ {
+ wss->ws_command = wsp->ws_command;
+ }
+
+ if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
+ {
+ wss->ws_closure = wsp->ws_closure;
+ flags |= wsp->ws_flags & WRDSF_CLOSURE;
+ }
+
+ wss->ws_options = wsp->ws_options;
+
+ flags |= WRDSF_DELIM
+ | WRDSF_ALLOC_DIE
+ | WRDSF_ERROR
+ | WRDSF_DEBUG
+ | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
+
+ rc = wordsplit_init (wss, str, len, flags);
+ if (rc)
+ return rc;
+ wss->ws_lvl = wsp->ws_lvl + 1;
+ rc = wordsplit_process_list (wss, 0);
+ if (rc)
+ {
+ wordsplit_free_nodes (wss);
+ return rc;
+ }
+ if (finalize)
+ {
+ rc = wordsplit_finish (wss);
+ wordsplit_free_nodes (wss);
+ }
+ return rc;
+}
+
+static void
+_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
+{
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_errno = wss->ws_errno;
+ if (wss->ws_errno == WRDSE_USERERR)
+ {
+ wsp->ws_usererr = wss->ws_usererr;
+ wss->ws_errno = WRDSE_EOF;
+ wss->ws_usererr = NULL;
+ }
+}
+
static void
wordsplit_init0 (struct wordsplit *wsp)
{
@@ -93,6 +188,7 @@ wordsplit_init0 (struct wordsplit *wsp)
{
if (!(wsp->ws_flags & WRDSF_APPEND))
wordsplit_free_words (wsp);
+ wordsplit_clearerr (wsp);
}
else
{
@@ -102,9 +198,10 @@ wordsplit_init0 (struct wordsplit *wsp)
}
wsp->ws_errno = 0;
- wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
@@ -116,23 +213,21 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_ERROR))
wsp->ws_error = _wsplt_error;
- if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+ if (!(wsp->ws_flags & WRDSF_NOVAR))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_USAGE;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ /* These will be initialized on first variable assignment */
+ wsp->ws_envidx = wsp->ws_envsiz = 0;
+ wsp->ws_envbuf = NULL;
}
if (!(wsp->ws_flags & WRDSF_NOCMD))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_NOSUPP;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ if (!wsp->ws_command)
+ {
+ _wsplt_seterr (wsp, WRDSE_USAGE);
+ errno = EINVAL;
+ return wsp->ws_errno;
+ }
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
@@ -163,10 +258,42 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_CLOSURE))
wsp->ws_closure = NULL;
+ if (!(wsp->ws_flags & WRDSF_OPTIONS))
+ wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[WRDSX_WORD])
+ wsp->ws_escape[WRDSX_WORD] = "";
+ if (!wsp->ws_escape[WRDSX_QUOTE])
+ wsp->ws_escape[WRDSX_QUOTE] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
+ wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[WRDSX_WORD] = "";
+ wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
+
wsp->ws_endp = 0;
+ wsp->ws_wordi = 0;
+ if (wsp->ws_flags & WRDSF_REUSE)
+ wordsplit_free_nodes (wsp);
+ wsp->ws_head = wsp->ws_tail = NULL;
+
wordsplit_init0 (wsp);
-
+
return 0;
}
@@ -209,6 +336,7 @@ alloc_space (struct wordsplit *wsp, size_t count)
#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
#define _WSNF_SEXP 0x20 /* is a sed expression */
+#define _WSNF_DELIM 0x40 /* node is a delimiter */
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
wordsplit_add_segm must add the
@@ -233,7 +361,7 @@ struct wordsplit_node
static const char *
wsnode_flagstr (int flags)
{
- static char retbuf[6];
+ static char retbuf[7];
char *p = retbuf;
if (flags & _WSNF_WORD)
@@ -258,6 +386,10 @@ wsnode_flagstr (int flags)
*p++ = 's';
else
*p++ = '-';
+ if (flags & _WSNF_DELIM)
+ *p++ = 'd';
+ else
+ *p++ = '-';
*p = 0;
return retbuf;
}
@@ -338,6 +470,14 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
node->next = node->prev = NULL;
}
+static struct wordsplit_node *
+wsnode_tail (struct wordsplit_node *p)
+{
+ while (p && p->next)
+ p = p->next;
+ return p;
+}
+
static void
wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
struct wordsplit_node *anchor, int before)
@@ -353,22 +493,24 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
wsnode_insert (wsp, node, anchor->prev, 0);
else
{
+ struct wordsplit_node *tail = wsnode_tail (node);
node->prev = NULL;
- node->next = anchor;
- anchor->prev = node;
+ tail->next = anchor;
+ anchor->prev = tail;
wsp->ws_head = node;
}
}
else
{
struct wordsplit_node *p;
+ struct wordsplit_node *tail = wsnode_tail (node);
p = anchor->next;
if (p)
- p->prev = node;
+ p->prev = tail;
else
- wsp->ws_tail = node;
- node->next = p;
+ wsp->ws_tail = tail;
+ tail->next = p;
node->prev = anchor;
anchor->next = node;
}
@@ -415,10 +557,12 @@ wordsplit_dump_nodes (struct wordsplit *wsp)
for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
{
if (p->flags & _WSNF_WORD)
- wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
else
- wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags),
(int) (p->v.segm.end - p->v.segm.beg),
wsp->ws_input + p->v.segm.beg);
@@ -433,6 +577,9 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
char *buf, *cur;
int stop;
+ if (!(node->flags & _WSNF_JOIN))
+ return 0;
+
for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
{
len += wsnode_len (p);
@@ -457,6 +604,7 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
cur += slen;
if (p != node)
{
+ node->flags |= p->flags & _WSNF_QUOTE;
wsnode_remove (wsp, p);
stop = p == end;
wsnode_free (p);
@@ -476,13 +624,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
return 0;
}
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
wsnode_quoteremoval (struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
@@ -491,9 +640,7 @@ wsnode_quoteremoval (struct wordsplit *wsp)
int unquote;
if (wsp->ws_flags & WRDSF_QUOTE)
- {
- unquote = !(p->flags & _WSNF_NOEXPAND);
- }
+ unquote = !(p->flags & _WSNF_NOEXPAND);
else
unquote = 0;
@@ -510,11 +657,8 @@ wsnode_quoteremoval (struct wordsplit *wsp)
p->flags |= _WSNF_WORD;
}
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
}
return 0;
@@ -535,23 +679,160 @@ wsnode_coalesce (struct wordsplit *wsp)
}
static int
+wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
+{
+ if (p->next)
+ {
+ struct wordsplit_node *np = p;
+ while (np && np->next)
+ {
+ np->flags |= _WSNF_JOIN;
+ np = np->next;
+ }
+ if (coalesce_segment (wsp, p))
+ return 1;
+ }
+ return 0;
+}
+
+static size_t skip_delim (struct wordsplit *wsp);
+
+static int
wordsplit_finish (struct wordsplit *wsp)
{
struct wordsplit_node *p;
size_t n;
+ int delim;
- n = 0;
+ /* Postprocess delimiters. It would be rather simple, if it weren't for
+ the incremental operation.
- for (p = wsp->ws_head; p; p = p->next)
- n++;
+ Nodes of type _WSNF_DELIM get inserted to the node list if either
+ WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
+
+ The following cases should be distinguished:
+
+ 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
+ any runs of similar delimiter nodes to a single node. The nodes are
+ 'similar' if they point to the same delimiter character.
+
+ If WRDSO_MAXWORDS option is set, stop compressing when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
+ remove any delimiter nodes. Stop operation when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 3. If incremental operation is in progress, restart the loop any time
+ a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
+ is set.
+ */
+ again:
+ delim = 0; /* Delimiter being processed (if any) */
+ n = 0; /* Number of words processed so far */
+ p = wsp->ws_head; /* Current node */
+
+ while (p)
+ {
+ struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM)
+ {
+ if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ {
+ if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
+ {
+ char const *s = wsnode_ptr (wsp, p);
+ if (delim)
+ {
+ if (delim == *s)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ else
+ {
+ delim = 0;
+ n++; /* Count this node; it will be returned */
+ }
+ }
+ else
+ {
+ delim = *s;
+ p = next;
+ continue;
+ }
+ }
+ }
+ else if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ }
+ else
+ {
+ if (delim)
+ {
+ /* Last node was a delimiter or a compressed run of delimiters;
+ Count it, and clear the delimiter marker */
+ n++;
+ delim = 0;
+ }
+ if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
+ break;
+ }
+ }
+ n++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ p = NULL; /* Break the loop */
+ else
+ p = next;
+ }
+
+ if (p)
+ {
+ /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
+ words have already been collected. Reconstruct a single final
+ node from the remaining nodes. */
+ if (wsnode_tail_coalesce (wsp, p))
+ return wsp->ws_errno;
+ n++;
+ }
+
+ if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
+ {
+ /* The loop above have eliminated all nodes. Restart the
+ processing, if there's any input left. */
+ if (wsp->ws_endp < wsp->ws_len)
+ {
+ int rc;
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ wsp->ws_debug (_("Restarting"));
+ rc = wordsplit_process_list (wsp, skip_delim (wsp));
+ if (rc)
+ return rc;
+ }
+ else
+ {
+ wsp->ws_error = WRDSE_EOF;
+ return WRDSE_EOF;
+ }
+ goto again;
+ }
if (alloc_space (wsp, n + 1))
- return 1;
+ return wsp->ws_errno;
- for (p = wsp->ws_head; p; p = p->next)
+ while (wsp->ws_head)
{
- const char *str = wsnode_ptr (wsp, p);
- size_t slen = wsnode_len (p);
+ const char *str = wsnode_ptr (wsp, wsp->ws_head);
+ size_t slen = wsnode_len (wsp->ws_head);
char *newstr = malloc (slen + 1);
/* Assign newstr first, even if it is NULL. This way
@@ -563,14 +844,47 @@ wordsplit_finish (struct wordsplit *wsp)
memcpy (newstr, str, slen);
newstr[slen] = 0;
+ wsnode_remove (wsp, wsp->ws_head);
+
wsp->ws_wordc++;
+ wsp->ws_wordi++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ break;
}
wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
return 0;
}
+int
+wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
+{
+ int rc;
+ size_t i;
+ rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
+ if (rc)
+ return rc;
+ for (i = 0; i < argc; i++)
+ {
+ char *newstr = strdup (argv[i]);
+ if (!newstr)
+ {
+ while (i > 0)
+ {
+ free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
+ i--;
+ }
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
+ }
+ wsp->ws_wordc += i;
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
+ return 0;
+}
+
/* Variable expansion */
static int
node_split_prefix (struct wordsplit *wsp,
@@ -607,10 +921,10 @@ node_split_prefix (struct wordsplit *wsp,
}
static int
-find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
+find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
+ char const *paren)
{
- enum
- { st_init, st_squote, st_dquote } state = st_init;
+ enum { st_init, st_squote, st_dquote } state = st_init;
size_t level = 1;
for (; i < len; i++)
@@ -620,18 +934,23 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
case st_init:
switch (str[i])
{
- case '{':
- level++;
- break;
-
- case '}':
- if (--level == 0)
+ default:
+ if (str[i] == paren[0])
+ {
+ level++;
+ break;
+ }
+ else if (str[i] == paren[1])
{
- *poff = i;
- return 0;
+ if (--level == 0)
+ {
+ *poff = i;
+ return 0;
+ }
+ break;
}
break;
-
+
case '"':
state = st_dquote;
break;
@@ -658,13 +977,14 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
return 1;
}
-static const char *
-wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
+static int
+wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
+ char const **ret)
{
size_t i;
if (!(wsp->ws_flags & WRDSF_ENV))
- return NULL;
+ return WRDSE_UNDEF;
if (wsp->ws_flags & WRDSF_ENV_KV)
{
@@ -673,14 +993,17 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
{
size_t elen = strlen (wsp->ws_env[i]);
if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
- return wsp->ws_env[i + 1];
+ {
+ *ret = wsp->ws_env[i + 1];
+ return WRDSE_OK;
+ }
/* Skip the value. Break the loop if it is NULL. */
i++;
if (wsp->ws_env[i] == NULL)
break;
}
}
- else
+ else if (wsp->ws_env)
{
/* Usual (A=B) environment. */
for (i = 0; wsp->ws_env[i]; i++)
@@ -692,10 +1015,117 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (name[j] != var[j])
break;
if (j == len && var[j] == '=')
- return var + j + 1;
+ {
+ *ret = var + j + 1;
+ return WRDSE_OK;
+ }
}
}
- return NULL;
+ return WRDSE_UNDEF;
+}
+
+static int
+wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
+ char *value)
+{
+ int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
+ char *v;
+
+ if (wsp->ws_envidx + n >= wsp->ws_envsiz)
+ {
+ size_t sz;
+ char **newenv;
+
+ if (!wsp->ws_envbuf)
+ {
+ if (wsp->ws_flags & WRDSF_ENV)
+ {
+ size_t i = 0, j;
+
+ if (wsp->ws_env)
+ {
+ for (; wsp->ws_env[i]; i++)
+ ;
+ }
+
+ sz = i + n + 1;
+
+ newenv = calloc (sz, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+
+ for (j = 0; j < i; j++)
+ {
+ newenv[j] = strdup (wsp->ws_env[j]);
+ if (!newenv[j])
+ {
+ for (; j > 1; j--)
+ free (newenv[j-1]);
+ free (newenv[j-1]);
+ return _wsplt_nomem (wsp);
+ }
+ }
+ newenv[j] = NULL;
+
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = i;
+ wsp->ws_envsiz = sz;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ else
+ {
+ newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = 0;
+ wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ wsp->ws_flags |= WRDSF_ENV;
+ }
+ }
+ else
+ {
+ wsp->ws_envsiz *= 2;
+ newenv = realloc (wsp->ws_envbuf,
+ wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ }
+
+ if (wsp->ws_flags & WRDSF_ENV_KV)
+ {
+ /* A key-value pair environment */
+ char *p = malloc (namelen + 1);
+ if (!p)
+ return _wsplt_nomem (wsp);
+ memcpy (p, name, namelen);
+ p[namelen] = 0;
+
+ v = strdup (value);
+ if (!v)
+ {
+ free (p);
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_env[wsp->ws_envidx++] = p;
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ else
+ {
+ v = malloc (namelen + strlen(value) + 2);
+ if (!v)
+ return _wsplt_nomem (wsp);
+ memcpy (v, name, namelen);
+ v[namelen++] = '=';
+ strcpy(v + namelen, value);
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ wsp->ws_env[wsp->ws_envidx++] = NULL;
+ return WRDSE_OK;
}
static int
@@ -704,15 +1134,17 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
{
size_t i = 0;
const char *defstr = NULL;
- const char *value;
+ char *value;
const char *vptr;
struct wordsplit_node *newnode;
const char *start = str - 1;
-
- if (ISALPHA (str[0]) || str[0] == '_')
+ int rc;
+ struct wordsplit ws;
+
+ if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
- if (!(ISALNUM (str[i]) || str[i] == '_'))
+ if (!ISVARCHR (str[i]))
break;
*pend = str + i - 1;
}
@@ -721,30 +1153,36 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
str++;
len--;
for (i = 1; i < len; i++)
- if (str[i] == '}' || str[i] == ':')
- break;
- if (str[i] == ':')
{
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_cbrace (str, i + 1, len, &j))
+ if (str[i] == ':')
{
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
+ size_t j;
+
+ defstr = str + i + 1;
+ if (find_closing_paren (str, i + 1, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
+ }
+ else if (str[i] == '}')
+ {
+ defstr = NULL;
+ *pend = str + i;
+ break;
+ }
+ else if (strchr ("-+?=", str[i]))
+ {
+ size_t j;
+
+ defstr = str + i;
+ if (find_closing_paren (str, i, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
}
- *pend = str + j;
- }
- else if (str[i] == '}')
- {
- defstr = NULL;
- *pend = str + i;
- }
- else
- {
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
}
+ if (i == len)
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
}
else
{
@@ -768,32 +1206,144 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
i - its length
defstr - default replacement str */
- vptr = wordsplit_find_env (wsp, str, i);
- if (vptr)
+ if (defstr && strchr("-+?=", defstr[0]) == 0)
{
- value = strdup (vptr);
- if (!value)
- return _wsplt_nomem (wsp);
+ rc = WRDSE_UNDEF;
+ defstr = NULL;
}
- else if (wsp->ws_flags & WRDSF_GETVAR)
- value = wsp->ws_getvar (str, i, wsp->ws_closure);
- else if (wsp->ws_flags & WRDSF_UNDEF)
+ else
{
- wsp->ws_errno = WRDSE_UNDEF;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return 1;
+ rc = wordsplit_find_env (wsp, str, i, &vptr);
+ if (rc == WRDSE_OK)
+ {
+ if (vptr)
+ {
+ value = strdup (vptr);
+ if (!value)
+ rc = WRDSE_NOSPACE;
+ }
+ else
+ rc = WRDSE_UNDEF;
+ }
+ else if (wsp->ws_flags & WRDSF_GETVAR)
+ rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
+ else
+ rc = WRDSE_UNDEF;
+
+ if (rc == WRDSE_OK
+ && (!value || value[0] == 0)
+ && defstr && defstr[-1] == ':')
+ {
+ free (value);
+ rc = WRDSE_UNDEF;
+ }
}
- else
+
+ switch (rc)
{
- if (wsp->ws_flags & WRDSF_WARNUNDEF)
- wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
- if (wsp->ws_flags & WRDSF_KEEPUNDEF)
- value = NULL;
+ case WRDSE_OK:
+ if (defstr && *defstr == '+')
+ {
+ size_t size = *pend - ++defstr;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
+ if (rc)
+ return rc;
+ free (value);
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+ }
+ break;
+
+ case WRDSE_UNDEF:
+ if (defstr)
+ {
+ size_t size;
+ if (*defstr == '-' || *defstr == '=')
+ {
+ size = *pend - ++defstr;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)),
+ 1);
+ if (rc)
+ return rc;
+
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+
+ if (defstr[-1] == '=')
+ wsplt_assign_var (wsp, str, i, value);
+ }
+ else
+ {
+ if (*defstr == '?')
+ {
+ size = *pend - ++defstr;
+ if (size == 0)
+ wsp->ws_error (_("%.*s: variable null or not set"),
+ (int) i, str);
+ else
+ {
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS |
+ WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)),
+ 1);
+ if (rc == 0)
+ wsp->ws_error ("%.*s: %s",
+ (int) i, str, ws.ws_wordv[0]);
+ else
+ wsp->ws_error ("%.*s: %.*s",
+ (int) i, str, (int) size, defstr);
+ wordsplit_free (&ws);
+ }
+ }
+ value = NULL;
+ }
+ }
+ else if (wsp->ws_flags & WRDSF_UNDEF)
+ {
+ _wsplt_seterr (wsp, WRDSE_UNDEF);
+ return 1;
+ }
else
- value = "";
+ {
+ if (wsp->ws_flags & WRDSF_WARNUNDEF)
+ wsp->ws_error (_("warning: undefined variable `%.*s'"),
+ (int) i, str);
+ if (wsp->ws_flags & WRDSF_KEEPUNDEF)
+ value = NULL;
+ else
+ {
+ value = strdup ("");
+ if (!value)
+ return _wsplt_nomem (wsp);
+ }
+ }
+ break;
+
+ case WRDSE_NOSPACE:
+ return _wsplt_nomem (wsp);
+
+ case WRDSE_USERERR:
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_usererr = value;
+ /* fall through */
+ default:
+ _wsplt_seterr (wsp, rc);
+ return 1;
}
- /* FIXME: handle defstr */
+
if (value)
{
if (flg & _WSNF_QUOTE)
@@ -803,12 +1353,11 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
wsnode_insert (wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
- newnode->v.word = strdup (value);
- if (!newnode->v.word)
- return _wsplt_nomem (wsp);
+ newnode->v.word = value;
}
else if (*value == 0)
{
+ free (value);
/* Empty string is a special case */
if (wsnode_new (wsp, &newnode))
return 1;
@@ -819,28 +1368,23 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
else
{
struct wordsplit ws;
- int i;
-
- ws.ws_delim = wsp->ws_delim;
- if (wordsplit (value, &ws,
- WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
+ int rc;
+
+ rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
+ WRDSF_NOVAR | WRDSF_NOCMD |
+ WRDSF_QUOTE
+ | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
+ 0);
+ free (value);
+ if (rc)
{
+ _wsplt_seterr_sub (wsp, &ws);
wordsplit_free (&ws);
return 1;
}
- for (i = 0; i < ws.ws_wordc; i++)
- {
- if (wsnode_new (wsp, &newnode))
- return 1;
- wsnode_insert (wsp, newnode, *ptail, 0);
- *ptail = newnode;
- newnode->flags = _WSNF_WORD |
- _WSNF_NOEXPAND |
- (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
- newnode->v.word = strdup (ws.ws_wordv[i]);
- if (!newnode->v.word)
- return _wsplt_nomem (wsp);
- }
+ wsnode_insert (wsp, ws.ws_head, *ptail, 0);
+ *ptail = ws.ws_tail;
+ ws.ws_head = ws.ws_tail = NULL;
wordsplit_free (&ws);
}
}
@@ -871,7 +1415,19 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
}
static int
-node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
+begin_var_p (int c)
+{
+ return c == '{' || ISVARBEG (c);
+}
+
+static int
+node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
+ int (*beg_p) (int),
+ int (*ws_exp_fn) (struct wordsplit *wsp,
+ const char *str, size_t len,
+ struct wordsplit_node **ptail,
+ const char **pend,
+ int flg))
{
const char *str = wsnode_ptr (wsp, node);
size_t slen = wsnode_len (node);
@@ -887,7 +1443,7 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
p++;
continue;
}
- if (*p == '$')
+ if (*p == '$' && beg_p (p[1]))
{
size_t n = p - str;
@@ -896,8 +1452,8 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
return 1;
p++;
- if (expvar (wsp, p, slen - n, &tail, &p,
- node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
+ if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
+ node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
return 1;
off += p - str + 1;
str = p + 1;
@@ -908,7 +1464,7 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
if (tail != node)
tail->flags |= _WSNF_JOIN;
if (node_split_prefix (wsp, &tail, node, off, p - str,
- node->flags & _WSNF_JOIN))
+ node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
return 1;
}
if (tail != node)
@@ -918,8 +1474,8 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
}
return 0;
}
-
-/* Remove NULL lists */
+
+/* Remove NULL nodes from the list */
static void
wsnode_nullelim (struct wordsplit *wsp)
{
@@ -928,6 +1484,8 @@ wsnode_nullelim (struct wordsplit *wsp)
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM && p->prev)
+ p->prev->flags &= ~_WSNF_JOIN;
if (p->flags & _WSNF_NULL)
{
wsnode_remove (wsp, p);
@@ -945,8 +1503,131 @@ wordsplit_varexp (struct wordsplit *wsp)
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
+ if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
+ if (node_expand (wsp, p, begin_var_p, expvar))
+ return 1;
+ p = next;
+ }
+
+ wsnode_nullelim (wsp);
+ return 0;
+}
+
+static int
+begin_cmd_p (int c)
+{
+ return c == '(';
+}
+
+static int
+expcmd (struct wordsplit *wsp, const char *str, size_t len,
+ struct wordsplit_node **ptail, const char **pend, int flg)
+{
+ int rc;
+ size_t j;
+ char *value;
+ struct wordsplit_node *newnode;
+ struct wordsplit ws;
+
+ str++;
+ len--;
+
+ if (find_closing_paren (str, 0, len, &j, "()"))
+ {
+ _wsplt_seterr (wsp, WRDSE_PAREN);
+ return 1;
+ }
+
+ *pend = str + j;
+ rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1);
+ if (rc)
+ {
+ _wsplt_seterr_sub (wsp, &ws);
+ wordsplit_free (&ws);
+ return 1;
+ }
+ rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
+ wordsplit_free (&ws);
+
+ if (rc == WRDSE_NOSPACE)
+ return _wsplt_nomem (wsp);
+ else if (rc)
+ {
+ if (rc == WRDSE_USERERR)
+ {
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_usererr = value;
+ }
+ _wsplt_seterr (wsp, rc);
+ return 1;
+ }
+
+ if (value)
+ {
+ if (flg & _WSNF_QUOTE)
+ {
+ if (wsnode_new (wsp, &newnode))
+ return 1;
+ wsnode_insert (wsp, newnode, *ptail, 0);
+ *ptail = newnode;
+ newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
+ newnode->v.word = value;
+ }
+ else if (*value == 0)
+ {
+ free (value);
+ /* Empty string is a special case */
+ if (wsnode_new (wsp, &newnode))
+ return 1;
+ wsnode_insert (wsp, newnode, *ptail, 0);
+ *ptail = newnode;
+ newnode->flags = _WSNF_NULL;
+ }
+ else
+ {
+ struct wordsplit ws;
+ int rc;
+
+ rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
+ WRDSF_NOVAR | WRDSF_NOCMD
+ | WRDSF_WS | WRDSF_QUOTE
+ | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0),
+ 0);
+ free (value);
+ if (rc)
+ {
+ _wsplt_seterr_sub (wsp, &ws);
+ wordsplit_free (&ws);
+ return 1;
+ }
+ wsnode_insert (wsp, ws.ws_head, *ptail, 0);
+ *ptail = ws.ws_tail;
+ ws.ws_head = ws.ws_tail = NULL;
+ wordsplit_free (&ws);
+ }
+ }
+ else
+ {
+ if (wsnode_new (wsp, &newnode))
+ return 1;
+ wsnode_insert (wsp, newnode, *ptail, 0);
+ *ptail = newnode;
+ newnode->flags = _WSNF_NULL;
+ }
+ return 0;
+}
+
+static int
+wordsplit_cmdexp (struct wordsplit *wsp)
+{
+ struct wordsplit_node *p;
+
+ for (p = wsp->ws_head; p;)
+ {
+ struct wordsplit_node *next = p->next;
if (!(p->flags & _WSNF_NOEXPAND))
- if (node_expand_vars (wsp, p))
+ if (node_expand (wsp, p, begin_cmd_p, expcmd))
return 1;
p = next;
}
@@ -958,7 +1639,7 @@ wordsplit_varexp (struct wordsplit *wsp)
/* Strip off any leading and trailing whitespace. This function is called
right after the initial scanning, therefore it assumes that every
node in the list is a text reference node. */
-static void
+static int
wordsplit_trimws (struct wordsplit *wsp)
{
struct wordsplit_node *p;
@@ -967,14 +1648,21 @@ wordsplit_trimws (struct wordsplit *wsp)
{
size_t n;
+ if (!(p->flags & _WSNF_QUOTE))
+ {
+ /* Skip leading whitespace: */
+ for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
+ n++)
+ ;
+ p->v.segm.beg = n;
+ }
+
+ while (p->next && (p->flags & _WSNF_JOIN))
+ p = p->next;
+
if (p->flags & _WSNF_QUOTE)
continue;
-
- /* Skip leading whitespace: */
- for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
- n++)
- ;
- p->v.segm.beg = n;
+
/* Trim trailing whitespace */
for (n = p->v.segm.end;
n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
@@ -984,6 +1672,196 @@ wordsplit_trimws (struct wordsplit *wsp)
}
wsnode_nullelim (wsp);
+ return 0;
+}
+
+static int
+wordsplit_tildexpand (struct wordsplit *wsp)
+{
+ struct wordsplit_node *p;
+ char *uname = NULL;
+ size_t usize = 0;
+
+ for (p = wsp->ws_head; p; p = p->next)
+ {
+ const char *str;
+
+ if (p->flags & _WSNF_QUOTE)
+ continue;
+
+ str = wsnode_ptr (wsp, p);
+ if (str[0] == '~')
+ {
+ size_t i, size, dlen;
+ size_t slen = wsnode_len (p);
+ struct passwd *pw;
+ char *newstr;
+
+ for (i = 1; i < slen && str[i] != '/'; i++)
+ ;
+ if (i == slen)
+ continue;
+ if (i > 1)
+ {
+ if (i > usize)
+ {
+ char *p = realloc (uname, i);
+ if (!p)
+ {
+ free (uname);
+ return _wsplt_nomem (wsp);
+ }
+ uname = p;
+ usize = i;
+ }
+ --i;
+ memcpy (uname, str + 1, i);
+ uname[i] = 0;
+ pw = getpwnam (uname);
+ }
+ else
+ pw = getpwuid (getuid ());
+
+ if (!pw)
+ continue;
+
+ dlen = strlen (pw->pw_dir);
+ size = slen - i + dlen;
+ newstr = malloc (size);
+ if (!newstr)
+ {
+ free (uname);
+ return _wsplt_nomem (wsp);
+ }
+ --size;
+
+ memcpy (newstr, pw->pw_dir, dlen);
+ memcpy (newstr + dlen, str + i + 1, slen - i - 1);
+ newstr[size] = 0;
+ if (p->flags & _WSNF_WORD)
+ free (p->v.word);
+ p->v.word = newstr;
+ p->flags |= _WSNF_WORD;
+ }
+ }
+ free (uname);
+ return 0;
+}
+
+static int
+isglob (const char *s, int l)
+{
+ while (l--)
+ {
+ if (strchr ("*?[", *s++))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+wordsplit_pathexpand (struct wordsplit *wsp)
+{
+ struct wordsplit_node *p, *next;
+ char *pattern = NULL;
+ size_t patsize = 0;
+ size_t slen;
+ int flags = 0;
+
+#ifdef GLOB_PERIOD
+ if (wsp->ws_options & WRDSO_DOTGLOB)
+ flags = GLOB_PERIOD;
+#endif
+
+ for (p = wsp->ws_head; p; p = next)
+ {
+ const char *str;
+
+ next = p->next;
+
+ if (p->flags & _WSNF_QUOTE)
+ continue;
+
+ str = wsnode_ptr (wsp, p);
+ slen = wsnode_len (p);
+
+ if (isglob (str, slen))
+ {
+ int i;
+ glob_t g;
+ struct wordsplit_node *prev;
+
+ if (slen + 1 > patsize)
+ {
+ char *p = realloc (pattern, slen + 1);
+ if (!p)
+ return _wsplt_nomem (wsp);
+ pattern = p;
+ patsize = slen + 1;
+ }
+ memcpy (pattern, str, slen);
+ pattern[slen] = 0;
+
+ switch (glob (pattern, flags, NULL, &g))
+ {
+ case 0:
+ break;
+
+ case GLOB_NOSPACE:
+ free (pattern);
+ return _wsplt_nomem (wsp);
+
+ case GLOB_NOMATCH:
+ if (wsp->ws_options & WRDSO_NULLGLOB)
+ {
+ wsnode_remove (wsp, p);
+ wsnode_free (p);
+ }
+ else if (wsp->ws_options & WRDSO_FAILGLOB)
+ {
+ char buf[128];
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ snprintf (buf, sizeof (buf), _("no files match pattern %s"),
+ pattern);
+ free (pattern);
+ wsp->ws_usererr = strdup (buf);
+ if (!wsp->ws_usererr)
+ return _wsplt_nomem (wsp);
+ else
+ return _wsplt_seterr (wsp, WRDSE_USERERR);
+ }
+ continue;
+
+ default:
+ free (pattern);
+ return _wsplt_seterr (wsp, WRDSE_GLOBERR);
+ }
+
+ prev = p;
+ for (i = 0; i < g.gl_pathc; i++)
+ {
+ struct wordsplit_node *newnode;
+ char *newstr;
+
+ if (wsnode_new (wsp, &newnode))
+ return 1;
+ newstr = strdup (g.gl_pathv[i]);
+ if (!newstr)
+ return _wsplt_nomem (wsp);
+ newnode->v.word = newstr;
+ newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
+ wsnode_insert (wsp, newnode, prev, 0);
+ prev = newnode;
+ }
+ globfree (&g);
+
+ wsnode_remove (wsp, p);
+ wsnode_free (p);
+ }
+ }
+ free (pattern);
+ return 0;
}
static int
@@ -1019,33 +1897,24 @@ skip_sed_expr (const char *command, size_t i, size_t len)
return i;
}
-static size_t
-skip_delim (struct wordsplit *wsp)
+/* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
+ is true, return its value, otherwise return the index past it. */
+static inline size_t
+skip_delim_internal (struct wordsplit *wsp, int return_delims)
{
- size_t start = wsp->ws_endp;
- if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
- {
- if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
- ISDELIM (wsp, wsp->ws_input[start]))
- {
- int delim = wsp->ws_input[start];
- do
- start++;
- while (start < wsp->ws_len && delim == wsp->ws_input[start]);
- }
- else
- {
- do
- start++;
- while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
- }
- start--;
- }
+ return return_delims ? wsp->ws_endp : wsp->ws_endp + 1;
+}
- if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS))
- start++;
+static inline size_t
+skip_delim (struct wordsplit *wsp)
+{
+ return skip_delim_internal (wsp, WSP_RETURN_DELIMS (wsp));
+}
- return start;
+static inline size_t
+skip_delim_real (struct wordsplit *wsp)
+{
+ return skip_delim_internal (wsp, wsp->ws_flags & WRDSF_RETURN_DELIMS);
}
#define _WRDS_EOF 0
@@ -1053,7 +1922,7 @@ skip_delim (struct wordsplit *wsp)
#define _WRDS_ERR 2
static int
-scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
+scan_qstring (struct wordsplit *wsp, size_t start, size_t *end)
{
size_t j;
const char *command = wsp->ws_input;
@@ -1075,23 +1944,22 @@ scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
else
{
wsp->ws_endp = start;
- wsp->ws_errno = WRDSE_QUOTE;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
+ _wsplt_seterr (wsp, WRDSE_QUOTE);
return _WRDS_ERR;
}
return 0;
}
static int
-scan_word (struct wordsplit *wsp, size_t start)
+scan_word (struct wordsplit *wsp, size_t start, int consume_all)
{
size_t len = wsp->ws_len;
const char *command = wsp->ws_input;
const char *comment = wsp->ws_comment;
int join = 0;
int flags = 0;
-
+ struct wordsplit_node *np = wsp->ws_tail;
+
size_t i = start;
if (i >= len)
@@ -1108,7 +1976,7 @@ scan_word (struct wordsplit *wsp, size_t start)
flags = _WSNF_SEXP;
i = skip_sed_expr (command, i, len);
}
- else if (!ISDELIM (wsp, command[i]))
+ else if (consume_all || !ISDELIM (wsp, command[i]))
{
while (i < len)
{
@@ -1147,15 +2015,28 @@ scan_word (struct wordsplit *wsp, size_t start)
}
}
- if (ISDELIM (wsp, command[i]))
+ if (command[i] == '$')
+ {
+ if (!(wsp->ws_flags & WRDSF_NOVAR)
+ && command[i+1] == '{'
+ && find_closing_paren (command, i + 2, len, &i, "{}") == 0)
+ continue;
+ if (!(wsp->ws_flags & WRDSF_NOCMD)
+ && command[i+1] == '('
+ && find_closing_paren (command, i + 2, len, &i, "()") == 0)
+ continue;
+ }
+
+ if (!consume_all && ISDELIM (wsp, command[i]))
break;
else
i++;
}
}
- else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ else if (WSP_RETURN_DELIMS (wsp))
{
i++;
+ flags |= _WSNF_DELIM;
}
else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
flags |= _WSNF_EMPTYOK;
@@ -1167,36 +2048,19 @@ scan_word (struct wordsplit *wsp, size_t start)
wsp->ws_endp = i;
if (wsp->ws_flags & WRDSF_INCREMENTAL)
return _WRDS_EOF;
- return _WRDS_OK;
-}
-
-static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
-
-int
-wordsplit_c_unquote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab; *p; p += 2)
+
+ if (consume_all)
{
- if (*p == c)
- return p[1];
- }
- return c;
-}
-
-int
-wordsplit_c_quote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab + sizeof (quote_transtab) - 2;
- p > quote_transtab; p -= 2)
- {
- if (*p == c)
- return p[-1];
+ if (!np)
+ np = wsp->ws_head;
+ while (np)
+ {
+ np->flags |= _WSNF_QUOTE;
+ np = np->next;
+ }
}
- return -1;
+
+ return _WRDS_OK;
}
#define to_num(c) \
@@ -1239,7 +2103,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
len += 3;
else
{
- if (wordsplit_c_quote_char (*str) != -1)
+ if (wordsplit_c_quote_char (*str))
len += 2;
else
len += 4;
@@ -1248,47 +2112,56 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
return len;
}
-void
-wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
- const char *escapable)
+static int
+wsplt_unquote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ while (*transtab && transtab[1])
{
- if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
- i++;
- *dst++ = src[i++];
+ if (*transtab++ == c)
+ return *transtab;
+ ++transtab;
}
- *dst = 0;
+ return 0;
}
-void
-wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
+static int
+wsplt_quote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ for (; *transtab && transtab[1]; transtab += 2)
{
- if (src[i] == '\\')
- i++;
- *dst++ = src[i++];
+ if (transtab[1] == c)
+ return *transtab;
}
- *dst = 0;
+ return 0;
+}
+
+int
+wordsplit_c_unquote_char (int c)
+{
+ return wsplt_unquote_char (wordsplit_c_escape_tab, c);
+}
+
+int
+wordsplit_c_quote_char (int c)
+{
+ return wsplt_quote_char (wordsplit_c_escape_tab, c);
}
void
-wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
+wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src, size_t n)
{
int i = 0;
int c;
+ inquote = !!inquote;
while (i < n)
{
if (src[i] == '\\')
{
++i;
- if (src[i] == 'x' || src[i] == 'X')
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
+ && (src[i] == 'x' || src[i] == 'X'))
{
if (n - i < 2)
{
@@ -1311,7 +2184,8 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
- else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
+ else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
+ && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
{
if (n - i < 1)
{
@@ -1333,8 +2207,17 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
+ else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
+ {
+ *dst++ = c;
+ ++i;
+ }
else
- *dst++ = wordsplit_c_unquote_char (src[i++]);
+ {
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
+ *dst++ = '\\';
+ *dst++ = src[i++];
+ }
}
else
*dst++ = src[i++];
@@ -1368,7 +2251,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
{
int c = wordsplit_c_quote_char (*src);
*dst++ = '\\';
- if (c != -1)
+ if (c)
*dst++ = c;
else
{
@@ -1381,20 +2264,82 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
}
}
+
+/* This structure describes a single expansion phase */
+struct exptab
+{
+ char const *descr; /* Textual description (for debugging) */
+ int flag; /* WRDSF_ bit that controls this phase */
+ int opt; /* Entry-specific options (see EXPOPT_ flags below */
+ int (*expansion) (struct wordsplit *wsp); /* expansion function */
+};
+
+/* The following options control expansions: */
+/* Normally the exptab entry is run if its flag bit is set in struct
+ wordsplit. The EXPOPT_NEG option negates this test so that expansion
+ is performed if its associated flag bit is not set in struct wordsplit. */
+#define EXPOPT_NEG 0x01
+/* All bits in flag must be set in order for entry to match */
+#define EXPORT_ALLOF 0x02
+/* Coalesce the input list before running the expansion. */
+#define EXPOPT_COALESCE 0x04
+
+static struct exptab exptab[] = {
+ { N_("WS trimming"), WRDSF_WS, 0,
+ wordsplit_trimws },
+ { N_("command substitution"), WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
+ wordsplit_cmdexp },
+ { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
+ NULL },
+ { N_("tilde expansion"), WRDSF_PATHEXPAND, 0,
+ wordsplit_tildexpand },
+ { N_("variable expansion"), WRDSF_NOVAR, EXPOPT_NEG,
+ wordsplit_varexp },
+ { N_("quote removal"), 0, EXPOPT_NEG,
+ wsnode_quoteremoval },
+ { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
+ NULL },
+ { N_("path expansion"), WRDSF_PATHEXPAND, 0,
+ wordsplit_pathexpand },
+ { NULL }
+};
+
+static inline int
+exptab_matches(struct exptab *p, struct wordsplit *wsp)
+{
+ int result;
+
+ result = (wsp->ws_flags & p->flag);
+ if (p->opt & EXPORT_ALLOF)
+ result = result == p->flag;
+ if (p->opt & EXPOPT_NEG)
+ result = !result;
+
+ return result;
+}
+
static int
wordsplit_process_list (struct wordsplit *wsp, size_t start)
{
- if (wsp->ws_flags & WRDSF_NOSPLIT)
- {
- /* Treat entire input as a quoted argument */
- if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
+ struct exptab *p;
+
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ wsp->ws_debug (_("(%02d) Input:%.*s;"),
+ wsp->ws_lvl, (int) wsp->ws_len, wsp->ws_input);
+
+ if ((wsp->ws_flags & WRDSF_NOSPLIT)
+ || ((wsp->ws_options & WRDSO_MAXWORDS)
+ && wsp->ws_wordi + 1 == wsp->ws_maxwords))
+ {
+ /* Treat entire input as a single word */
+ if (scan_word (wsp, start, 1) == _WRDS_ERR)
return wsp->ws_errno;
}
else
{
int rc;
- while ((rc = scan_word (wsp, start)) == _WRDS_OK)
+ while ((rc = scan_word (wsp, start, 0)) == _WRDS_OK)
start = skip_delim (wsp);
/* Make sure tail element is not joinable */
if (wsp->ws_tail)
@@ -1405,132 +2350,88 @@ wordsplit_process_list (struct wordsplit *wsp, size_t start)
if (wsp->ws_flags & WRDSF_SHOWDBG)
{
- wsp->ws_debug ("Initial list:");
+ wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
wordsplit_dump_nodes (wsp);
}
- if (wsp->ws_flags & WRDSF_WS)
- {
- /* Trim leading and trailing whitespace */
- wordsplit_trimws (wsp);
- if (wsp->ws_flags & WRDSF_SHOWDBG)
- {
- wsp->ws_debug ("After WS trimming:");
- wordsplit_dump_nodes (wsp);
- }
- }
-
- /* Expand variables (FIXME: & commands) */
- if (!(wsp->ws_flags & WRDSF_NOVAR))
- {
- if (wordsplit_varexp (wsp))
- {
- wordsplit_free_nodes (wsp);
- return wsp->ws_errno;
- }
- if (wsp->ws_flags & WRDSF_SHOWDBG)
- {
- wsp->ws_debug ("Expanded list:");
- wordsplit_dump_nodes (wsp);
- }
- }
-
- do
+ for (p = exptab; p->descr; p++)
{
- if (wsnode_quoteremoval (wsp))
- break;
- if (wsp->ws_flags & WRDSF_SHOWDBG)
- {
- wsp->ws_debug ("After quote removal:");
- wordsplit_dump_nodes (wsp);
- }
-
- if (wsnode_coalesce (wsp))
- break;
-
- if (wsp->ws_flags & WRDSF_SHOWDBG)
+ if (exptab_matches(p, wsp))
{
- wsp->ws_debug ("Coalesced list:");
- wordsplit_dump_nodes (wsp);
+ if (p->opt & EXPOPT_COALESCE)
+ {
+ if (wsnode_coalesce (wsp))
+ break;
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ {
+ wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
+ _("Coalesced list:"));
+ wordsplit_dump_nodes (wsp);
+ }
+ }
+ if (p->expansion)
+ {
+ if (p->expansion (wsp))
+ break;
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ {
+ wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
+ wordsplit_dump_nodes (wsp);
+ }
+ }
}
}
- while (0);
return wsp->ws_errno;
}
-int
-wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
- int flags)
+static int
+wordsplit_run (const char *command, size_t length, struct wordsplit *wsp,
+ int flags, int lvl)
{
int rc;
size_t start;
- const char *cmdptr;
- size_t cmdlen;
if (!command)
{
if (!(flags & WRDSF_INCREMENTAL))
- return EINVAL;
+ return _wsplt_seterr (wsp, WRDSE_USAGE);
+
+ if (wsp->ws_head)
+ return wordsplit_finish (wsp);
- start = skip_delim (wsp);
+ start = skip_delim_real (wsp);
if (wsp->ws_endp == wsp->ws_len)
- {
- wsp->ws_errno = WRDSE_NOINPUT;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
- }
+ return _wsplt_seterr (wsp, WRDSE_NOINPUT);
- cmdptr = wsp->ws_input + wsp->ws_endp;
- cmdlen = wsp->ws_len - wsp->ws_endp;
wsp->ws_flags |= WRDSF_REUSE;
wordsplit_init0 (wsp);
}
else
{
- cmdptr = command;
- cmdlen = length;
start = 0;
- rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
+ rc = wordsplit_init (wsp, command, length, flags);
if (rc)
return rc;
+ wsp->ws_lvl = lvl;
}
- if (wsp->ws_flags & WRDSF_SHOWDBG)
- wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
-
rc = wordsplit_process_list (wsp, start);
- if (rc == 0 && (flags & WRDSF_INCREMENTAL))
- {
- while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
- {
- start = skip_delim (wsp);
- if (wsp->ws_flags & WRDSF_SHOWDBG)
- {
- cmdptr = wsp->ws_input + wsp->ws_endp;
- cmdlen = wsp->ws_len - wsp->ws_endp;
- wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
- }
- rc = wordsplit_process_list (wsp, start);
- if (rc)
- break;
- }
- }
if (rc)
- {
- wordsplit_free_nodes (wsp);
- return rc;
- }
- wordsplit_finish (wsp);
- wordsplit_free_nodes (wsp);
- return wsp->ws_errno;
+ return rc;
+ return wordsplit_finish (wsp);
+}
+
+int
+wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
+ int flags)
+{
+ return wordsplit_run (command, length, wsp, flags, 0);
}
int
wordsplit (const char *command, struct wordsplit *ws, int flags)
{
- return wordsplit_len (command, command ? strlen (command) : 0, ws,
- flags);
+ return wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
}
void
@@ -1551,66 +2452,68 @@ wordsplit_free_words (struct wordsplit *ws)
}
void
+wordsplit_free_envbuf (struct wordsplit *ws)
+{
+ if (ws->ws_flags & WRDSF_NOCMD)
+ return;
+ if (ws->ws_envbuf)
+ {
+ size_t i;
+
+ for (i = 0; ws->ws_envbuf[i]; i++)
+ free (ws->ws_envbuf[i]);
+ free (ws->ws_envbuf);
+ ws->ws_envidx = ws->ws_envsiz = 0;
+ ws->ws_envbuf = NULL;
+ }
+}
+
+void
+wordsplit_clearerr (struct wordsplit *ws)
+{
+ if (ws->ws_errno == WRDSE_USERERR)
+ free (ws->ws_usererr);
+ ws->ws_usererr = NULL;
+ ws->ws_errno = WRDSE_OK;
+}
+
+void
wordsplit_free (struct wordsplit *ws)
{
+ wordsplit_free_nodes (ws);
wordsplit_free_words (ws);
free (ws->ws_wordv);
ws->ws_wordv = NULL;
+ wordsplit_free_envbuf (ws);
}
-void
-wordsplit_perror (struct wordsplit *wsp)
+int
+wordsplit_get_words (struct wordsplit *ws, size_t *wordc, char ***wordv)
{
- switch (wsp->ws_errno)
- {
- case WRDSE_EOF:
- wsp->ws_error (_("no error"));
- break;
+ char **p = realloc (ws->ws_wordv,
+ (ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
+ if (!p)
+ return -1;
+ *wordv = p;
+ *wordc = ws->ws_wordc;
- case WRDSE_QUOTE:
- wsp->ws_error (_("missing closing %c (start near #%lu)"),
- wsp->ws_input[wsp->ws_endp],
- (unsigned long) wsp->ws_endp);
- break;
-
- case WRDSE_NOSPACE:
- wsp->ws_error (_("memory exhausted"));
- break;
-
- case WRDSE_NOSUPP:
- wsp->ws_error (_("command substitution is not yet supported"));
- break;
-
- case WRDSE_USAGE:
- wsp->ws_error (_("invalid wordsplit usage"));
- break;
-
- case WRDSE_CBRACE:
- wsp->ws_error (_("unbalanced curly brace"));
- break;
-
- case WRDSE_UNDEF:
- wsp->ws_error (_("undefined variable"));
- break;
-
- case WRDSE_NOINPUT:
- wsp->ws_error (_("input exhausted"));
- break;
+ ws->ws_wordv = NULL;
+ ws->ws_wordc = 0;
+ ws->ws_wordn = 0;
- default:
- wsp->ws_error (_("unknown error"));
- }
+ return 0;
}
const char *_wordsplit_errstr[] = {
N_("no error"),
N_("missing closing quote"),
N_("memory exhausted"),
- N_("command substitution is not yet supported"),
N_("invalid wordsplit usage"),
N_("unbalanced curly brace"),
N_("undefined variable"),
- N_("input exhausted")
+ N_("input exhausted"),
+ N_("unbalanced parenthesis"),
+ N_("globbing error")
};
int _wordsplit_nerrs =
sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
@@ -1618,7 +2521,26 @@ int _wordsplit_nerrs =
const char *
wordsplit_strerror (struct wordsplit *ws)
{
+ if (ws->ws_errno == WRDSE_USERERR)
+ return ws->ws_usererr;
if (ws->ws_errno < _wordsplit_nerrs)
return _wordsplit_errstr[ws->ws_errno];
return N_("unknown error");
}
+
+void
+wordsplit_perror (struct wordsplit *wsp)
+{
+ switch (wsp->ws_errno)
+ {
+ case WRDSE_QUOTE:
+ wsp->ws_error (_("missing closing %c (start near #%lu)"),
+ wsp->ws_input[wsp->ws_endp],
+ (unsigned long) wsp->ws_endp);
+ break;
+
+ default:
+ wsp->ws_error ("%s", wordsplit_strerror (wsp));
+ }
+}
+

Return to:

Send suggestions and report system problems to the System administrator.