aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2010-12-29 09:44:39 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2010-12-29 09:44:39 +0200
commita66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad (patch)
tree53d2950acd081c73f376941d3de96045f8d9235d
parent6b6a4fba1a69ccf0d2d22a238d7f8373c84526a3 (diff)
downloadsmap-a66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad.tar.gz
smap-a66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad.tar.bz2
wordsplit: sync with mailutils
-rw-r--r--include/smap/wordsplit.h24
-rw-r--r--lib/wordsplit.c398
2 files changed, 272 insertions, 150 deletions
diff --git a/include/smap/wordsplit.h b/include/smap/wordsplit.h
index 204a9e0..3c4d8b1 100644
--- a/include/smap/wordsplit.h
+++ b/include/smap/wordsplit.h
@@ -19,8 +19,7 @@
#include <stddef.h>
-struct wordsplit
-{
+struct wordsplit {
size_t ws_wordc;
char **ws_wordv;
size_t ws_offs;
@@ -35,7 +34,8 @@ struct wordsplit
__attribute__ ((__format__ (__printf__, 1, 2)));
const char **ws_env;
- char *(*ws_getvar)(const char *, size_t);
+ const char *(*ws_getvar) (const char *, size_t, void *);
+ void *ws_closure;
const char *ws_input;
size_t ws_len;
@@ -62,11 +62,11 @@ struct wordsplit
is expanded. */
#define WRDSF_UNDEF 0x0000020
-/* Don't do variable expansion. Reserved for future use. */
+/* Don't do variable expansion. */
#define WRDSF_NOVAR 0x0000040
/* Abort on ENOMEM error */
#define WRDSF_ENOMEMABRT 0x0000080
-/* Treat whitespace as delimiters */
+/* Trim off any leading and trailind whitespace */
#define WRDSF_WS 0x0000100
/* Handle quotes and escape directives */
#define WRDSF_QUOTE 0x0000200
@@ -103,9 +103,15 @@ struct wordsplit
/* Handle C escapes */
#define WRDSF_CESCAPES 0x1000000
+/* ws_closure is set */
+#define WRDSF_CLOSURE 0x2000000
+/* ws_env is a Key/Value environment, i.e. the value of a variable is
+ stored in the element that follows its name. */
+#define WRDSF_ENV_KV 0x4000000
+
#define WRDSF_DEFFLAGS \
(WRDSF_NOVAR | WRDSF_NOCMD | \
- WRDSF_WS | WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
+ WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
#define WRDSE_EOF 0
#define WRDSE_QUOTE 1
@@ -116,11 +122,15 @@ struct wordsplit
#define WRDSE_UNDEF 6
int wordsplit(const char *s, struct wordsplit *p, int flags);
+int wordsplit_len(const char *s, size_t len,
+ struct wordsplit *p, int flags);
void wordsplit_free(struct wordsplit *p);
+void wordsplit_free_words(struct wordsplit *ws);
int wordsplit_c_unquote_char(int c);
int wordsplit_c_quote_char(int c);
-size_t wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote);
+size_t wordsplit_c_quoted_length(const char *str, int quote_hex,
+ int *quote);
void wordsplit_sh_unquote_copy(char *dst, const char *src, size_t n);
void wordsplit_c_unquote_copy(char *dst, const char *src, size_t n);
void wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex);
diff --git a/lib/wordsplit.c b/lib/wordsplit.c
index 9ad2669..b14155d 100644
--- a/lib/wordsplit.c
+++ b/lib/wordsplit.c
@@ -37,7 +37,8 @@
#define N_(msgid) msgid
#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
-#define ISDELIM(c,delim) (strchr(delim,(c))!=NULL)
+#define ISDELIM(ws,c) \
+ (strchr ((ws)->ws_delim, (c)) != NULL)
#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
@@ -97,14 +98,14 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len,
wsp->ws_error = _wsplt_error;
if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV|WRDSF_GETVAR))) {
+ && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) {
errno = EINVAL;
wsp->ws_errno = WRDSE_USAGE;
if (wsp->ws_flags & WRDSF_SHOWERR)
wordsplit_perror(wsp);
return wsp->ws_errno;
}
-
+
if (!(wsp->ws_flags & WRDSF_NOCMD)) {
errno = EINVAL;
wsp->ws_errno = WRDSE_NOSUPP;
@@ -117,11 +118,13 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_DEBUG)) {
if (wsp->ws_flags & WRDSF_ERROR)
wsp->ws_debug = wsp->ws_error;
+ else if (wsp->ws_flags & WRDSF_SHOWERR)
+ wsp->ws_debug = _wsplt_error;
else
wsp->ws_flags &= ~WRDSF_SHOWDBG;
}
}
-
+
wsp->ws_input = input;
wsp->ws_len = len;
@@ -135,16 +138,16 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len,
wsp->ws_comment = NULL;
if (wsp->ws_flags & WRDSF_REUSE) {
- wsp->ws_wordn = wsp->ws_wordc + 1;
if (!(wsp->ws_flags & WRDSF_APPEND))
- wsp->ws_wordc = 0;
+ wordsplit_free_words(wsp);
} else {
wsp->ws_wordv = NULL;
wsp->ws_wordc = 0;
wsp->ws_wordn = 0;
}
- if (wsp->ws_flags & WRDSF_DOOFFS)
- wsp->ws_wordn += wsp->ws_offs;
+
+ if (!(wsp->ws_flags & WRDSF_CLOSURE))
+ wsp->ws_closure = NULL;
wsp->ws_endp = 0;
wsp->ws_errno = 0;
@@ -161,11 +164,11 @@ alloc_space(struct wordsplit *wsp, size_t count)
if (wsp->ws_wordv == NULL) {
newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
- ptr = calloc(newalloc, sizeof (ptr[0]));
+ ptr = calloc(newalloc, sizeof(ptr[0]));
} else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) {
newalloc = offs + wsp->ws_wordc +
- count > ALLOC_INCR ? count : ALLOC_INCR;
- ptr = realloc(wsp->ws_wordv, newalloc * sizeof (ptr[0]));
+ count > ALLOC_INCR ? count : ALLOC_INCR;
+ ptr = realloc(wsp->ws_wordv, newalloc * sizeof(ptr[0]));
} else
return 0;
@@ -176,22 +179,28 @@ alloc_space(struct wordsplit *wsp, size_t count)
return _wsplt_nomem(wsp);
return 0;
}
-
-#define _WSNF_WORD 0x01 /* node contains word in v.word */
-#define _WSNF_QUOTE 0x02 /* text is quoted */
-#define _WSNF_NOEXPAND 0x04 /* text is not subject to expansion */
-#define _WSNF_JOIN 0x08 /* node must be joined with the next node */
-#define _WSNF_SEXP 0x10 /* is a sed expression */
+
+/* Node state flags */
+#define _WSNF_NULL 0x01 /* null node (a noop) */
+#define _WSNF_WORD 0x02 /* node contains word in v.word */
+#define _WSNF_QUOTE 0x04 /* text is quoted */
+#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
+#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
+#define _WSNF_SEXP 0x20 /* is a sed expression */
+
+#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
+ wordsplit_add_segm must add the
+ segment even if it is empty */
struct wordsplit_node {
- struct wordsplit_node *prev; /* Previous element */
- struct wordsplit_node *next; /* Next element */
- int flags; /* Node flags */
+ struct wordsplit_node *prev; /* Previous element */
+ struct wordsplit_node *next; /* Next element */
+ int flags; /* Node flags */
union {
struct {
- size_t beg; /* Start of word in ws_input */
- size_t end; /* End of word in ws_input */
+ size_t beg; /* Start of word in ws_input */
+ size_t end; /* End of word in ws_input */
} segm;
char *word;
} v;
@@ -202,9 +211,11 @@ wsnode_flagstr(int flags)
{
static char retbuf[6];
char *p = retbuf;
-
+
if (flags & _WSNF_WORD)
*p++ = 'w';
+ else if (flags & _WSNF_NULL)
+ *p++ = 'n';
else
*p++ = '-';
if (flags & _WSNF_QUOTE)
@@ -230,7 +241,9 @@ wsnode_flagstr(int flags)
static const char *
wsnode_ptr(struct wordsplit *wsp, struct wordsplit_node *p)
{
- if (p->flags & _WSNF_WORD)
+ if (p->flags & _WSNF_NULL)
+ return "";
+ else if (p->flags & _WSNF_WORD)
return p->v.word;
else
return wsp->ws_input + p->v.segm.beg;
@@ -239,7 +252,9 @@ wsnode_ptr(struct wordsplit *wsp, struct wordsplit_node *p)
static size_t
wsnode_len(struct wordsplit_node *p)
{
- if (p->flags & _WSNF_WORD)
+ if (p->flags & _WSNF_NULL)
+ return 0;
+ else if (p->flags & _WSNF_WORD)
return strlen(p->v.word);
else
return p->v.segm.end - p->v.segm.beg;
@@ -281,9 +296,11 @@ wsnode_remove(struct wordsplit *wsp, struct wordsplit_node *node)
struct wordsplit_node *p;
p = node->prev;
- if (p)
+ if (p) {
p->next = node->next;
- else
+ if (!node->next)
+ p->flags &= ~_WSNF_JOIN;
+ } else
wsp->ws_head = node->next;
p = node->next;
@@ -331,12 +348,12 @@ wordsplit_add_segm(struct wordsplit *wsp, size_t beg, size_t end, int flg)
struct wordsplit_node *node;
int rc;
- if (end == beg)
+ if (end == beg && !(flg & _WSNF_EMPTYOK))
return 0;
rc = wsnode_new(wsp, &node);
if (rc)
return rc;
- node->flags = flg & ~_WSNF_WORD;
+ node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
node->v.segm.beg = beg;
node->v.segm.end = end;
wsnode_append(wsp, node);
@@ -348,11 +365,12 @@ wordsplit_free_nodes(struct wordsplit *wsp)
{
struct wordsplit_node *p;
- for (p = wsp->ws_head; p; ) {
+ for (p = wsp->ws_head; p;) {
struct wordsplit_node *next = p->next;
wsnode_free(p);
p = next;
}
+ wsp->ws_head = wsp->ws_tail = NULL;
}
static void
@@ -360,15 +378,16 @@ wordsplit_dump_nodes(struct wordsplit *wsp)
{
struct wordsplit_node *p;
int n = 0;
-
+
for (p = wsp->ws_head, n = 0; p; p = p->next, n++) {
if (p->flags & _WSNF_WORD)
wsp->ws_debug("%4d: %p: %#04x (%s):%s;",
- n, p, p->flags, wsnode_flagstr(p->flags),
- p->v.word);
+ n, p, p->flags,
+ wsnode_flagstr(p->flags), p->v.word);
else
wsp->ws_debug("%4d: %p: %#04x (%s):%.*s;",
- n, p, p->flags, wsnode_flagstr(p->flags),
+ n, p, p->flags,
+ wsnode_flagstr(p->flags),
p->v.segm.end - p->v.segm.beg,
wsp->ws_input + p->v.segm.beg);
}
@@ -381,18 +400,18 @@ coalesce_segment(struct wordsplit *wsp, struct wordsplit_node *node)
size_t len = 0;
char *buf, *cur;
int stop;
-
+
for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) {
len += wsnode_len(p);
}
len += wsnode_len(p);
end = p;
-
+
buf = malloc(len + 1);
if (!buf)
return _wsplt_nomem(wsp);
cur = buf;
-
+
p = node;
for (stop = 0; !stop;) {
struct wordsplit_node *next = p->next;
@@ -408,9 +427,9 @@ coalesce_segment(struct wordsplit *wsp, struct wordsplit_node *node)
}
p = next;
}
-
+
*cur = 0;
-
+
node->flags &= ~_WSNF_JOIN;
if (node->flags & _WSNF_WORD)
@@ -425,9 +444,9 @@ static int
wsnode_quoteremoval(struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn)(char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
+ void (*uqfn) (char *, const char *, size_t) =
+ (wsp->ws_flags & WRDSF_CESCAPES) ?
+ wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next) {
const char *str = wsnode_ptr(wsp, p);
@@ -495,22 +514,21 @@ wordsplit_finish(struct wordsplit *wsp)
return _wsplt_nomem(wsp);
memcpy(newstr, str, slen);
newstr[slen] = 0;
-
+
wsp->ws_wordc++;
-
+
}
wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
return 0;
}
-
+
/* Variable expansion */
static int
node_split_prefix(struct wordsplit *wsp,
struct wordsplit_node **ptail,
struct wordsplit_node *node,
- size_t beg, size_t len,
- int flg)
+ size_t beg, size_t len, int flg)
{
struct wordsplit_node *newnode;
@@ -538,12 +556,12 @@ node_split_prefix(struct wordsplit *wsp,
}
static int
-find_closing_cbrace(const char *str, size_t i, size_t len, size_t *poff)
+find_closing_cbrace(const char *str, size_t i, size_t len, size_t * poff)
{
enum { st_init, st_squote, st_dquote } state = st_init;
size_t level = 1;
- for ( ; i < len; i++) {
+ for (; i < len; i++) {
switch (state) {
case st_init:
switch (str[i]) {
@@ -588,18 +606,34 @@ static const char *
wordsplit_find_env(struct wordsplit *wsp, const char *name, size_t len)
{
size_t i;
-
+
if (!(wsp->ws_flags & WRDSF_ENV))
return NULL;
- for (i = 0; wsp->ws_env[i]; i++) {
- size_t j;
- const char *var = wsp->ws_env[i];
-
- for (j = 0; j < len; j++)
- if (name[j] != var[j])
+
+ if (wsp->ws_flags & WRDSF_ENV_KV) {
+ /* A key-value pair environment */
+ for (i = 0; wsp->ws_env[i]; i++) {
+ size_t elen = strlen(wsp->ws_env[i]);
+ if (elen == len
+ && memcmp(wsp->ws_env[i], name, elen) == 0)
+ return wsp->ws_env[i + 1];
+ /* Skip the value. Break the loop if it is NULL. */
+ i++;
+ if (wsp->ws_env[i] == NULL)
break;
- if (j == len && var[j] == '=')
- return var + j + 1;
+ }
+ } else {
+ /* Usual (A=B) environment. */
+ for (i = 0; wsp->ws_env[i]; i++) {
+ size_t j;
+ const char *var = wsp->ws_env[i];
+
+ for (j = 0; j < len; j++)
+ if (name[j] != var[j])
+ break;
+ if (j == len && var[j] == '=')
+ return var + j + 1;
+ }
}
return NULL;
}
@@ -610,11 +644,11 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
{
size_t i = 0;
const char *defstr = NULL;
- char *value;
+ const char *value;
const char *vptr;
struct wordsplit_node *newnode;
const char *start = str - 1;
-
+
if (ISALPHA(str[0]) || str[0] == '_') {
for (i = 1; i < len; i++)
if (!(ISALNUM(str[i]) || str[i] == '_'))
@@ -628,7 +662,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
break;
if (str[i] == ':') {
size_t j;
-
+
defstr = str + i + 1;
if (find_closing_cbrace(str, i + 1, len, &j)) {
wsp->ws_errno = WRDSE_CBRACE;
@@ -647,7 +681,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
return 1;
wsnode_insert(wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_WORD|flg;
+ newnode->flags = _WSNF_WORD | flg;
newnode->v.word = malloc(3);
if (!newnode->v.word)
return _wsplt_nomem(wsp);
@@ -669,7 +703,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
if (!value)
return _wsplt_nomem(wsp);
} else if (wsp->ws_flags & WRDSF_GETVAR)
- value = wsp->ws_getvar(str, i);
+ value = wsp->ws_getvar(str, i, wsp->ws_closure);
else if (wsp->ws_flags & WRDSF_UNDEF) {
wsp->ws_errno = WRDSE_UNDEF;
if (wsp->ws_flags & WRDSF_SHOWERR)
@@ -677,31 +711,42 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
return 1;
} else {
if (wsp->ws_flags & WRDSF_WARNUNDEF)
- wsp->ws_error(_("warning: undefined variable `%.*s'"),
- i, str);
+ wsp->
+ ws_error(_
+ ("warning: undefined variable `%.*s'"),
+ i, str);
if (wsp->ws_flags & WRDSF_KEEPUNDEF)
value = NULL;
else
value = "";
}
- /* FIXME: handle defstr */
+ /* FIXME: handle defstr */
if (value) {
if (flg & _WSNF_QUOTE) {
if (wsnode_new(wsp, &newnode))
return 1;
wsnode_insert(wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_WORD|_WSNF_NOEXPAND|flg;
+ newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
newnode->v.word = strdup(value);
if (!newnode->v.word)
return _wsplt_nomem(wsp);
+ } else if (*value == 0) {
+ /* Empty string is a special case */
+ if (wsnode_new(wsp, &newnode))
+ return 1;
+ wsnode_insert(wsp, newnode, *ptail, 0);
+ *ptail = newnode;
+ newnode->flags = _WSNF_NULL;
} else {
struct wordsplit ws;
int i;
-
+
+ ws.ws_delim = wsp->ws_delim;
if (wordsplit(value, &ws,
WRDSF_NOVAR | WRDSF_NOCMD |
- WRDSF_WS | WRDSF_SQUEEZE_DELIMS)) {
+ WRDSF_DELIM | WRDSF_SQUEEZE_DELIMS))
+ {
wordsplit_free(&ws);
return 1;
}
@@ -711,10 +756,10 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
wsnode_insert(wsp, newnode, *ptail, 0);
*ptail = newnode;
newnode->flags = _WSNF_WORD |
- _WSNF_NOEXPAND |
- (i + 1 < ws.ws_wordc ?
- (flg & ~_WSNF_JOIN)
- : flg);
+ _WSNF_NOEXPAND |
+ (i + 1 <
+ ws.
+ ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
newnode->v.word = strdup(ws.ws_wordv[i]);
if (!newnode->v.word)
return _wsplt_nomem(wsp);
@@ -723,17 +768,23 @@ expvar(struct wordsplit *wsp, const char *str, size_t len,
}
} else if (wsp->ws_flags & WRDSF_KEEPUNDEF) {
size_t size = *pend - start + 1;
-
+
if (wsnode_new(wsp, &newnode))
return 1;
wsnode_insert(wsp, newnode, *ptail, 0);
*ptail = newnode;
- newnode->flags = _WSNF_WORD|_WSNF_NOEXPAND|flg;
+ newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
newnode->v.word = malloc(size + 1);
if (!newnode->v.word)
return _wsplt_nomem(wsp);
memcpy(newnode->v.word, start, size);
newnode->v.word[size] = 0;
+ } else {
+ if (wsnode_new(wsp, &newnode))
+ return 1;
+ wsnode_insert(wsp, newnode, *ptail, 0);
+ *ptail = newnode;
+ newnode->flags = _WSNF_NULL;
}
return 0;
}
@@ -747,7 +798,7 @@ node_expand_vars(struct wordsplit *wsp, struct wordsplit_node *node)
const char *p;
size_t off = 0;
struct wordsplit_node *tail = node;
-
+
for (p = str; p < end; p++) {
if (*p == '\\') {
p++;
@@ -758,12 +809,13 @@ node_expand_vars(struct wordsplit *wsp, struct wordsplit_node *node)
if (tail != node)
tail->flags |= _WSNF_JOIN;
- if (node_split_prefix(wsp, &tail, node, off, n,
- _WSNF_JOIN))
+ if (node_split_prefix
+ (wsp, &tail, node, off, n, _WSNF_JOIN))
return 1;
p++;
if (expvar(wsp, p, slen - n, &tail, &p,
- node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
+ node->
+ flags & (_WSNF_JOIN | _WSNF_QUOTE)))
return 1;
off += p - str + 1;
str = p + 1;
@@ -788,16 +840,52 @@ wordsplit_varexp(struct wordsplit *wsp)
{
struct wordsplit_node *p;
- for (p = wsp->ws_head; p; ) {
+ for (p = wsp->ws_head; p;) {
struct wordsplit_node *next = p->next;
if (!(p->flags & _WSNF_NOEXPAND))
if (node_expand_vars(wsp, p))
return 1;
p = next;
}
+
+ /* Remove NULL lists */
+ for (p = wsp->ws_head; p;) {
+ struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_NULL) {
+ wsnode_remove(wsp, p);
+ wsnode_free(p);
+ }
+ p = next;
+ }
+
return 0;
}
+/* Strip off any leading and trailing whitespace. This function is called
+ right after the initial scanning, therefore it assumes that every
+ node in the list is a text reference node. */
+static void
+wordsplit_trimws(struct wordsplit *wsp)
+{
+ struct wordsplit_node *p;
+
+ for (p = wsp->ws_head; p; p = p->next) {
+ size_t n;
+
+ if (p->flags & _WSNF_QUOTE)
+ continue;
+
+ /* Skip leading whitespace: */
+ for (n = p->v.segm.beg;
+ n < p->v.segm.end && ISWS(wsp->ws_input[n]); n++);
+ p->v.segm.beg = n;
+ /* Trim trailing whitespace */
+ for (n = p->v.segm.end;
+ n > p->v.segm.beg && ISWS(wsp->ws_input[n - 1]); n--);
+ p->v.segm.end = n;
+ }
+}
+
static int
skip_sed_expr(const char *command, size_t i, size_t len)
{
@@ -808,13 +896,14 @@ skip_sed_expr(const char *command, size_t i, size_t len)
if (command[i] == ';')
i++;
- if (!(command[i] == 's' && i + 3 < len
- && ISPUNCT(command[i+1])))
+ if (!
+ (command[i] == 's' && i + 3 < len
+ && ISPUNCT(command[i + 1])))
break;
delim = command[++i];
state = 1;
- for (i++; i < len; i++) {
+ for (i++; i < len; i++) {
if (state == 3) {
if (command[i] == delim
|| !ISALNUM(command[i]))
@@ -824,7 +913,8 @@ skip_sed_expr(const char *command, size_t i, size_t len)
else if (command[i] == delim)
state++;
}
- } while (state == 3 && i < len && command[i] == ';');
+ }
+ while (state == 3 && i < len && command[i] == ';');
return i;
}
@@ -836,7 +926,7 @@ skip_delim(struct wordsplit *wsp)
do
start++;
while (start < wsp->ws_len
- && ISDELIM(wsp->ws_input[start], wsp->ws_delim));
+ && ISDELIM(wsp, wsp->ws_input[start]));
start--;
}
@@ -851,18 +941,18 @@ skip_delim(struct wordsplit *wsp)
#define _WRDS_ERR 2
static int
-scan_qstring(struct wordsplit *wsp, size_t start, size_t *end)
+scan_qstring(struct wordsplit *wsp, size_t start, size_t * end)
{
size_t j;
const char *command = wsp->ws_input;
size_t len = wsp->ws_len;
char q = command[start];
-
+
for (j = start + 1; j < len && command[j] != q; j++)
if (q == '"' && command[j] == '\\')
j++;
if (j < len && command[j] == q) {
- int flags = _WSNF_QUOTE;
+ int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
if (q == '\'')
flags |= _WSNF_NOEXPAND;
if (wordsplit_add_segm(wsp, start + 1, j, flags))
@@ -883,41 +973,30 @@ scan_word(struct wordsplit *wsp, size_t start)
{
size_t len = wsp->ws_len;
const char *command = wsp->ws_input;
- const char *delim = wsp->ws_delim;
const char *comment = wsp->ws_comment;
int join = 0;
int flags = 0;
-
+
size_t i = start;
if (i >= len) {
wsp->ws_errno = WRDSE_EOF;
return _WRDS_EOF;
}
-
- if ((wsp->ws_flags & WRDSF_WS)
- && !(wsp->ws_flags & WRDSF_RETURN_DELIMS)) {
- /* Skip initial whitespace */
- while (ISWS(command[i]))
- if (++i == len) {
- wsp->ws_errno = WRDSE_EOF;
- return _WRDS_EOF;
- }
- }
start = i;
if (wsp->ws_flags & WRDSF_SED_EXPR
- && command[i] == 's' && i + 3 < len && ISPUNCT(command[i+1])) {
+ && command[i] == 's' && i + 3 < len
+ && ISPUNCT(command[i + 1])) {
flags = _WSNF_SEXP;
i = skip_sed_expr(command, i, len);
- } else if (!ISDELIM(command[i], delim)) {
+ } else if (!ISDELIM(wsp, command[i])) {
while (i < len) {
if (comment && strchr(comment, command[i]) != NULL) {
size_t j;
- for (j = i + 1; j < len
- && command[j] != '\n'; j++)
- ;
+ for (j = i + 1;
+ j < len && command[j] != '\n'; j++);
if (wordsplit_add_segm(wsp, start, i, 0))
return _WRDS_ERR;
wsp->ws_endp = j;
@@ -925,19 +1004,20 @@ scan_word(struct wordsplit *wsp, size_t start)
}
if (wsp->ws_flags & WRDSF_QUOTE) {
- if (command[i] == '\\') {
+ if (command[i] == '\\') {
if (++i == len)
break;
i++;
continue;
}
-
- if (command[i] == '\'' || command[i] == '"') {
+
+ if (command[i] == '\''
+ || command[i] == '"') {
if (join && wsp->ws_tail)
- wsp->ws_tail->flags
- |= _WSNF_JOIN;
- if (wordsplit_add_segm(wsp, start,
- i, _WSNF_JOIN))
+ wsp->ws_tail->flags |=
+ _WSNF_JOIN;
+ if (wordsplit_add_segm
+ (wsp, start, i, _WSNF_JOIN))
return _WRDS_ERR;
if (scan_qstring(wsp, i, &i))
return _WRDS_ERR;
@@ -946,8 +1026,7 @@ scan_word(struct wordsplit *wsp, size_t start)
}
}
- if (((wsp->ws_flags & WRDSF_WS) && ISWS(command[i]))
- || ISDELIM(command[i], delim))
+ if (ISDELIM(wsp, command[i]))
break;
else
i++;
@@ -955,9 +1034,10 @@ scan_word(struct wordsplit *wsp, size_t start)
} else if (wsp->ws_flags & WRDSF_RETURN_DELIMS) {
do {
i++;
- } while (i < len && ISDELIM(command[i], delim));
- }
-
+ }
+ while (i < len && ISDELIM(wsp, command[i]));
+ } else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
+ flags |= _WSNF_EMPTYOK;
if (join && i > start && wsp->ws_tail)
wsp->ws_tail->flags |= _WSNF_JOIN;
@@ -998,13 +1078,12 @@ wordsplit_c_quote_char(int c)
#define to_num(c) \
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
-static int
-xtonum(int *pval, const char *src, int base, int cnt)
+static int xtonum(int *pval, const char *src, int base, int cnt)
{
int i, val;
for (i = 0, val = 0; i < cnt; i++, src++) {
- int n = *(unsigned char*)src;
+ int n = *(unsigned char *) src;
if (n > 127 || (n = to_num(n)) >= base)
break;
val = val * base + n;
@@ -1020,13 +1099,14 @@ wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote)
*quote = 0;
for (; *str; str++) {
- if (*str == ' ') {
- len++;
+ if (strchr(" \"", *str))
*quote = 1;
- } else if (*str == '"') {
+
+ if (*str == ' ')
+ len++;
+ else if (*str == '"')
len += 2;
- *quote = 1;
- } else if (*str != '\t' && *str != '\\' && ISPRINT(*str))
+ else if (*str != '\t' && *str != '\\' && ISPRINT(*str))
len++;
else if (quote_hex)
len += 3;
@@ -1072,19 +1152,19 @@ wordsplit_c_unquote_copy(char *dst, const char *src, size_t n)
if (off == 0) {
*dst++ = '\\';
*dst++ = src[i++];
- }
- else {
+ } else {
*dst++ = c;
i += off + 1;
}
}
- } else if ((unsigned char)src[i] < 128
+ } else if ((unsigned char) src[i] < 128
&& ISDIGIT(src[i])) {
if (n - i < 1) {
*dst++ = '\\';
*dst++ = src[i++];
} else {
- int off = xtonum(&c, src+i, 8, 3);
+ int off =
+ xtonum(&c, src + i, 8, 3);
if (off == 0) {
*dst++ = '\\';
*dst++ = src[i++];
@@ -1094,7 +1174,8 @@ wordsplit_c_unquote_copy(char *dst, const char *src, size_t n)
}
}
} else
- *dst++ = wordsplit_c_unquote_char(src[i++]);
+ *dst++ =
+ wordsplit_c_unquote_char(src[i++]);
} else
*dst++ = src[i++];
}
@@ -1115,7 +1196,7 @@ wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex)
if (quote_hex) {
snprintf(tmp, sizeof tmp, "%%%02X",
- *(unsigned char*)src);
+ *(unsigned char *) src);
memcpy(dst, tmp, 3);
dst += 3;
} else {
@@ -1125,7 +1206,7 @@ wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex)
*dst++ = c;
else {
snprintf(tmp, sizeof tmp, "%03o",
- *(unsigned char*)src);
+ *(unsigned char *) src);
memcpy(dst, tmp, 3);
dst += 3;
}
@@ -1155,6 +1236,9 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp,
} else {
while ((rc = scan_word(wsp, start)) == _WRDS_OK)
start = skip_delim(wsp);
+ /* Make sure tail element is not joinable */
+ if (wsp->ws_tail)
+ wsp->ws_tail->flags &= ~_WSNF_JOIN;
}
if (wsp->ws_flags & WRDSF_SHOWDBG) {
@@ -1166,7 +1250,16 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp,
return wsp->ws_errno;
}
- /* FIXME: Expand variables & commands here */
+ if (wsp->ws_flags & WRDSF_WS) {
+ /* Trim leading and trailing whitespace */
+ wordsplit_trimws(wsp);
+ if (wsp->ws_flags & WRDSF_SHOWDBG) {
+ wsp->ws_debug("After WS trimming:");
+ wordsplit_dump_nodes(wsp);
+ }
+ }
+
+ /* Expand variables (FIXME: & commands) */
if (!(wsp->ws_flags & WRDSF_NOVAR)) {
if (wordsplit_varexp(wsp)) {
wordsplit_free_nodes(wsp);
@@ -1185,17 +1278,18 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp,
wsp->ws_debug("After quote removal:");
wordsplit_dump_nodes(wsp);
}
-
+
if (wsnode_coalesce(wsp))
break;
-
+
if (wsp->ws_flags & WRDSF_SHOWDBG) {
wsp->ws_debug("Coalesced list:");
wordsplit_dump_nodes(wsp);
}
wordsplit_finish(wsp);
- } while (0);
+ }
+ while (0);
wordsplit_free_nodes(wsp);
return wsp->ws_errno;
}
@@ -1203,12 +1297,28 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp,
int
wordsplit(const char *command, struct wordsplit *ws, int flags)
{
- return wordsplit_len(command, strlen (command), ws, flags);
+ return wordsplit_len(command, strlen(command), ws, flags);
+}
+
+void
+wordsplit_free_words(struct wordsplit *ws)
+{
+ size_t i;
+
+ for (i = 0; i < ws->ws_wordc; i++) {
+ char *p = ws->ws_wordv[ws->ws_offs + i];
+ if (p) {
+ free(p);
+ ws->ws_wordv[ws->ws_offs + i] = NULL;
+ }
+ }
+ ws->ws_wordc = 0;
}
void
wordsplit_free(struct wordsplit *ws)
{
+ wordsplit_free_words(ws);
free(ws->ws_wordv);
ws->ws_wordv = NULL;
}
@@ -1220,7 +1330,7 @@ wordsplit_perror(struct wordsplit *wsp)
case WRDSE_EOF:
wsp->ws_error(_("no error"));
break;
-
+
case WRDSE_QUOTE:
wsp->ws_error(_("missing closing %c (start near #%lu)"),
wsp->ws_input[wsp->ws_endp],
@@ -1230,9 +1340,11 @@ wordsplit_perror(struct wordsplit *wsp)
case WRDSE_NOSPACE:
wsp->ws_error(_("memory exhausted"));
break;
-
+
case WRDSE_NOSUPP:
- wsp->ws_error(_("command substitution is not yet supported"));
+ wsp->
+ ws_error(_
+ ("command substitution is not yet supported"));
case WRDSE_USAGE:
wsp->ws_error(_("invalid wordsplit usage"));
@@ -1245,7 +1357,7 @@ wordsplit_perror(struct wordsplit *wsp)
case WRDSE_UNDEF:
wsp->ws_error(_("undefined variable"));
break;
-
+
default:
wsp->ws_error(_("unknown error"));
}
@@ -1261,7 +1373,8 @@ const char *_wordsplit_errstr[] = {
N_("unbalanced curly brace"),
N_("undefined variable")
};
-int _wordsplit_nerrs = sizeof(_wordsplit_errstr)/sizeof(_wordsplit_errstr[0]);
+int _wordsplit_nerrs =
+ sizeof(_wordsplit_errstr) / sizeof(_wordsplit_errstr[0]);
const char *
wordsplit_strerror(struct wordsplit *ws)
@@ -1270,4 +1383,3 @@ wordsplit_strerror(struct wordsplit *ws)
return _wordsplit_errstr[ws->ws_errno];
return N_("unknown error");
}
-

Return to:

Send suggestions and report system problems to the System administrator.