diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2010-12-29 09:44:39 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2010-12-29 09:44:39 +0200 |
commit | a66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad (patch) | |
tree | 53d2950acd081c73f376941d3de96045f8d9235d | |
parent | 6b6a4fba1a69ccf0d2d22a238d7f8373c84526a3 (diff) | |
download | smap-a66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad.tar.gz smap-a66d9bb29b7a8bb357f7a085e152e10ed7a1d2ad.tar.bz2 |
wordsplit: sync with mailutils
-rw-r--r-- | include/smap/wordsplit.h | 24 | ||||
-rw-r--r-- | lib/wordsplit.c | 398 |
2 files changed, 272 insertions, 150 deletions
diff --git a/include/smap/wordsplit.h b/include/smap/wordsplit.h index 204a9e0..3c4d8b1 100644 --- a/include/smap/wordsplit.h +++ b/include/smap/wordsplit.h @@ -19,8 +19,7 @@ #include <stddef.h> -struct wordsplit -{ +struct wordsplit { size_t ws_wordc; char **ws_wordv; size_t ws_offs; @@ -35,7 +34,8 @@ struct wordsplit __attribute__ ((__format__ (__printf__, 1, 2))); const char **ws_env; - char *(*ws_getvar)(const char *, size_t); + const char *(*ws_getvar) (const char *, size_t, void *); + void *ws_closure; const char *ws_input; size_t ws_len; @@ -62,11 +62,11 @@ struct wordsplit is expanded. */ #define WRDSF_UNDEF 0x0000020 -/* Don't do variable expansion. Reserved for future use. */ +/* Don't do variable expansion. */ #define WRDSF_NOVAR 0x0000040 /* Abort on ENOMEM error */ #define WRDSF_ENOMEMABRT 0x0000080 -/* Treat whitespace as delimiters */ +/* Trim off any leading and trailind whitespace */ #define WRDSF_WS 0x0000100 /* Handle quotes and escape directives */ #define WRDSF_QUOTE 0x0000200 @@ -103,9 +103,15 @@ struct wordsplit /* Handle C escapes */ #define WRDSF_CESCAPES 0x1000000 +/* ws_closure is set */ +#define WRDSF_CLOSURE 0x2000000 +/* ws_env is a Key/Value environment, i.e. the value of a variable is + stored in the element that follows its name. */ +#define WRDSF_ENV_KV 0x4000000 + #define WRDSF_DEFFLAGS \ (WRDSF_NOVAR | WRDSF_NOCMD | \ - WRDSF_WS | WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) + WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) #define WRDSE_EOF 0 #define WRDSE_QUOTE 1 @@ -116,11 +122,15 @@ struct wordsplit #define WRDSE_UNDEF 6 int wordsplit(const char *s, struct wordsplit *p, int flags); +int wordsplit_len(const char *s, size_t len, + struct wordsplit *p, int flags); void wordsplit_free(struct wordsplit *p); +void wordsplit_free_words(struct wordsplit *ws); int wordsplit_c_unquote_char(int c); int wordsplit_c_quote_char(int c); -size_t wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote); +size_t wordsplit_c_quoted_length(const char *str, int quote_hex, + int *quote); void wordsplit_sh_unquote_copy(char *dst, const char *src, size_t n); void wordsplit_c_unquote_copy(char *dst, const char *src, size_t n); void wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex); diff --git a/lib/wordsplit.c b/lib/wordsplit.c index 9ad2669..b14155d 100644 --- a/lib/wordsplit.c +++ b/lib/wordsplit.c @@ -37,7 +37,8 @@ #define N_(msgid) msgid #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') -#define ISDELIM(c,delim) (strchr(delim,(c))!=NULL) +#define ISDELIM(ws,c) \ + (strchr ((ws)->ws_delim, (c)) != NULL) #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') @@ -97,14 +98,14 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len, wsp->ws_error = _wsplt_error; if (!(wsp->ws_flags & WRDSF_NOVAR) - && !(wsp->ws_flags & (WRDSF_ENV|WRDSF_GETVAR))) { + && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) { errno = EINVAL; wsp->ws_errno = WRDSE_USAGE; if (wsp->ws_flags & WRDSF_SHOWERR) wordsplit_perror(wsp); return wsp->ws_errno; } - + if (!(wsp->ws_flags & WRDSF_NOCMD)) { errno = EINVAL; wsp->ws_errno = WRDSE_NOSUPP; @@ -117,11 +118,13 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len, if (!(wsp->ws_flags & WRDSF_DEBUG)) { if (wsp->ws_flags & WRDSF_ERROR) wsp->ws_debug = wsp->ws_error; + else if (wsp->ws_flags & WRDSF_SHOWERR) + wsp->ws_debug = _wsplt_error; else wsp->ws_flags &= ~WRDSF_SHOWDBG; } } - + wsp->ws_input = input; wsp->ws_len = len; @@ -135,16 +138,16 @@ wordsplit_init(struct wordsplit *wsp, const char *input, size_t len, wsp->ws_comment = NULL; if (wsp->ws_flags & WRDSF_REUSE) { - wsp->ws_wordn = wsp->ws_wordc + 1; if (!(wsp->ws_flags & WRDSF_APPEND)) - wsp->ws_wordc = 0; + wordsplit_free_words(wsp); } else { wsp->ws_wordv = NULL; wsp->ws_wordc = 0; wsp->ws_wordn = 0; } - if (wsp->ws_flags & WRDSF_DOOFFS) - wsp->ws_wordn += wsp->ws_offs; + + if (!(wsp->ws_flags & WRDSF_CLOSURE)) + wsp->ws_closure = NULL; wsp->ws_endp = 0; wsp->ws_errno = 0; @@ -161,11 +164,11 @@ alloc_space(struct wordsplit *wsp, size_t count) if (wsp->ws_wordv == NULL) { newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; - ptr = calloc(newalloc, sizeof (ptr[0])); + ptr = calloc(newalloc, sizeof(ptr[0])); } else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) { newalloc = offs + wsp->ws_wordc + - count > ALLOC_INCR ? count : ALLOC_INCR; - ptr = realloc(wsp->ws_wordv, newalloc * sizeof (ptr[0])); + count > ALLOC_INCR ? count : ALLOC_INCR; + ptr = realloc(wsp->ws_wordv, newalloc * sizeof(ptr[0])); } else return 0; @@ -176,22 +179,28 @@ alloc_space(struct wordsplit *wsp, size_t count) return _wsplt_nomem(wsp); return 0; } - -#define _WSNF_WORD 0x01 /* node contains word in v.word */ -#define _WSNF_QUOTE 0x02 /* text is quoted */ -#define _WSNF_NOEXPAND 0x04 /* text is not subject to expansion */ -#define _WSNF_JOIN 0x08 /* node must be joined with the next node */ -#define _WSNF_SEXP 0x10 /* is a sed expression */ + +/* Node state flags */ +#define _WSNF_NULL 0x01 /* null node (a noop) */ +#define _WSNF_WORD 0x02 /* node contains word in v.word */ +#define _WSNF_QUOTE 0x04 /* text is quoted */ +#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ +#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ +#define _WSNF_SEXP 0x20 /* is a sed expression */ + +#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that + wordsplit_add_segm must add the + segment even if it is empty */ struct wordsplit_node { - struct wordsplit_node *prev; /* Previous element */ - struct wordsplit_node *next; /* Next element */ - int flags; /* Node flags */ + struct wordsplit_node *prev; /* Previous element */ + struct wordsplit_node *next; /* Next element */ + int flags; /* Node flags */ union { struct { - size_t beg; /* Start of word in ws_input */ - size_t end; /* End of word in ws_input */ + size_t beg; /* Start of word in ws_input */ + size_t end; /* End of word in ws_input */ } segm; char *word; } v; @@ -202,9 +211,11 @@ wsnode_flagstr(int flags) { static char retbuf[6]; char *p = retbuf; - + if (flags & _WSNF_WORD) *p++ = 'w'; + else if (flags & _WSNF_NULL) + *p++ = 'n'; else *p++ = '-'; if (flags & _WSNF_QUOTE) @@ -230,7 +241,9 @@ wsnode_flagstr(int flags) static const char * wsnode_ptr(struct wordsplit *wsp, struct wordsplit_node *p) { - if (p->flags & _WSNF_WORD) + if (p->flags & _WSNF_NULL) + return ""; + else if (p->flags & _WSNF_WORD) return p->v.word; else return wsp->ws_input + p->v.segm.beg; @@ -239,7 +252,9 @@ wsnode_ptr(struct wordsplit *wsp, struct wordsplit_node *p) static size_t wsnode_len(struct wordsplit_node *p) { - if (p->flags & _WSNF_WORD) + if (p->flags & _WSNF_NULL) + return 0; + else if (p->flags & _WSNF_WORD) return strlen(p->v.word); else return p->v.segm.end - p->v.segm.beg; @@ -281,9 +296,11 @@ wsnode_remove(struct wordsplit *wsp, struct wordsplit_node *node) struct wordsplit_node *p; p = node->prev; - if (p) + if (p) { p->next = node->next; - else + if (!node->next) + p->flags &= ~_WSNF_JOIN; + } else wsp->ws_head = node->next; p = node->next; @@ -331,12 +348,12 @@ wordsplit_add_segm(struct wordsplit *wsp, size_t beg, size_t end, int flg) struct wordsplit_node *node; int rc; - if (end == beg) + if (end == beg && !(flg & _WSNF_EMPTYOK)) return 0; rc = wsnode_new(wsp, &node); if (rc) return rc; - node->flags = flg & ~_WSNF_WORD; + node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK); node->v.segm.beg = beg; node->v.segm.end = end; wsnode_append(wsp, node); @@ -348,11 +365,12 @@ wordsplit_free_nodes(struct wordsplit *wsp) { struct wordsplit_node *p; - for (p = wsp->ws_head; p; ) { + for (p = wsp->ws_head; p;) { struct wordsplit_node *next = p->next; wsnode_free(p); p = next; } + wsp->ws_head = wsp->ws_tail = NULL; } static void @@ -360,15 +378,16 @@ wordsplit_dump_nodes(struct wordsplit *wsp) { struct wordsplit_node *p; int n = 0; - + for (p = wsp->ws_head, n = 0; p; p = p->next, n++) { if (p->flags & _WSNF_WORD) wsp->ws_debug("%4d: %p: %#04x (%s):%s;", - n, p, p->flags, wsnode_flagstr(p->flags), - p->v.word); + n, p, p->flags, + wsnode_flagstr(p->flags), p->v.word); else wsp->ws_debug("%4d: %p: %#04x (%s):%.*s;", - n, p, p->flags, wsnode_flagstr(p->flags), + n, p, p->flags, + wsnode_flagstr(p->flags), p->v.segm.end - p->v.segm.beg, wsp->ws_input + p->v.segm.beg); } @@ -381,18 +400,18 @@ coalesce_segment(struct wordsplit *wsp, struct wordsplit_node *node) size_t len = 0; char *buf, *cur; int stop; - + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) { len += wsnode_len(p); } len += wsnode_len(p); end = p; - + buf = malloc(len + 1); if (!buf) return _wsplt_nomem(wsp); cur = buf; - + p = node; for (stop = 0; !stop;) { struct wordsplit_node *next = p->next; @@ -408,9 +427,9 @@ coalesce_segment(struct wordsplit *wsp, struct wordsplit_node *node) } p = next; } - + *cur = 0; - + node->flags &= ~_WSNF_JOIN; if (node->flags & _WSNF_WORD) @@ -425,9 +444,9 @@ static int wsnode_quoteremoval(struct wordsplit *wsp) { struct wordsplit_node *p; - void (*uqfn)(char *, const char *, size_t) = - (wsp->ws_flags & WRDSF_CESCAPES) ? - wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; + void (*uqfn) (char *, const char *, size_t) = + (wsp->ws_flags & WRDSF_CESCAPES) ? + wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; for (p = wsp->ws_head; p; p = p->next) { const char *str = wsnode_ptr(wsp, p); @@ -495,22 +514,21 @@ wordsplit_finish(struct wordsplit *wsp) return _wsplt_nomem(wsp); memcpy(newstr, str, slen); newstr[slen] = 0; - + wsp->ws_wordc++; - + } wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; return 0; } - + /* Variable expansion */ static int node_split_prefix(struct wordsplit *wsp, struct wordsplit_node **ptail, struct wordsplit_node *node, - size_t beg, size_t len, - int flg) + size_t beg, size_t len, int flg) { struct wordsplit_node *newnode; @@ -538,12 +556,12 @@ node_split_prefix(struct wordsplit *wsp, } static int -find_closing_cbrace(const char *str, size_t i, size_t len, size_t *poff) +find_closing_cbrace(const char *str, size_t i, size_t len, size_t * poff) { enum { st_init, st_squote, st_dquote } state = st_init; size_t level = 1; - for ( ; i < len; i++) { + for (; i < len; i++) { switch (state) { case st_init: switch (str[i]) { @@ -588,18 +606,34 @@ static const char * wordsplit_find_env(struct wordsplit *wsp, const char *name, size_t len) { size_t i; - + if (!(wsp->ws_flags & WRDSF_ENV)) return NULL; - for (i = 0; wsp->ws_env[i]; i++) { - size_t j; - const char *var = wsp->ws_env[i]; - - for (j = 0; j < len; j++) - if (name[j] != var[j]) + + if (wsp->ws_flags & WRDSF_ENV_KV) { + /* A key-value pair environment */ + for (i = 0; wsp->ws_env[i]; i++) { + size_t elen = strlen(wsp->ws_env[i]); + if (elen == len + && memcmp(wsp->ws_env[i], name, elen) == 0) + return wsp->ws_env[i + 1]; + /* Skip the value. Break the loop if it is NULL. */ + i++; + if (wsp->ws_env[i] == NULL) break; - if (j == len && var[j] == '=') - return var + j + 1; + } + } else { + /* Usual (A=B) environment. */ + for (i = 0; wsp->ws_env[i]; i++) { + size_t j; + const char *var = wsp->ws_env[i]; + + for (j = 0; j < len; j++) + if (name[j] != var[j]) + break; + if (j == len && var[j] == '=') + return var + j + 1; + } } return NULL; } @@ -610,11 +644,11 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, { size_t i = 0; const char *defstr = NULL; - char *value; + const char *value; const char *vptr; struct wordsplit_node *newnode; const char *start = str - 1; - + if (ISALPHA(str[0]) || str[0] == '_') { for (i = 1; i < len; i++) if (!(ISALNUM(str[i]) || str[i] == '_')) @@ -628,7 +662,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, break; if (str[i] == ':') { size_t j; - + defstr = str + i + 1; if (find_closing_cbrace(str, i + 1, len, &j)) { wsp->ws_errno = WRDSE_CBRACE; @@ -647,7 +681,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, return 1; wsnode_insert(wsp, newnode, *ptail, 0); *ptail = newnode; - newnode->flags = _WSNF_WORD|flg; + newnode->flags = _WSNF_WORD | flg; newnode->v.word = malloc(3); if (!newnode->v.word) return _wsplt_nomem(wsp); @@ -669,7 +703,7 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, if (!value) return _wsplt_nomem(wsp); } else if (wsp->ws_flags & WRDSF_GETVAR) - value = wsp->ws_getvar(str, i); + value = wsp->ws_getvar(str, i, wsp->ws_closure); else if (wsp->ws_flags & WRDSF_UNDEF) { wsp->ws_errno = WRDSE_UNDEF; if (wsp->ws_flags & WRDSF_SHOWERR) @@ -677,31 +711,42 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, return 1; } else { if (wsp->ws_flags & WRDSF_WARNUNDEF) - wsp->ws_error(_("warning: undefined variable `%.*s'"), - i, str); + wsp-> + ws_error(_ + ("warning: undefined variable `%.*s'"), + i, str); if (wsp->ws_flags & WRDSF_KEEPUNDEF) value = NULL; else value = ""; } - /* FIXME: handle defstr */ + /* FIXME: handle defstr */ if (value) { if (flg & _WSNF_QUOTE) { if (wsnode_new(wsp, &newnode)) return 1; wsnode_insert(wsp, newnode, *ptail, 0); *ptail = newnode; - newnode->flags = _WSNF_WORD|_WSNF_NOEXPAND|flg; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; newnode->v.word = strdup(value); if (!newnode->v.word) return _wsplt_nomem(wsp); + } else if (*value == 0) { + /* Empty string is a special case */ + if (wsnode_new(wsp, &newnode)) + return 1; + wsnode_insert(wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; } else { struct wordsplit ws; int i; - + + ws.ws_delim = wsp->ws_delim; if (wordsplit(value, &ws, WRDSF_NOVAR | WRDSF_NOCMD | - WRDSF_WS | WRDSF_SQUEEZE_DELIMS)) { + WRDSF_DELIM | WRDSF_SQUEEZE_DELIMS)) + { wordsplit_free(&ws); return 1; } @@ -711,10 +756,10 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, wsnode_insert(wsp, newnode, *ptail, 0); *ptail = newnode; newnode->flags = _WSNF_WORD | - _WSNF_NOEXPAND | - (i + 1 < ws.ws_wordc ? - (flg & ~_WSNF_JOIN) - : flg); + _WSNF_NOEXPAND | + (i + 1 < + ws. + ws_wordc ? (flg & ~_WSNF_JOIN) : flg); newnode->v.word = strdup(ws.ws_wordv[i]); if (!newnode->v.word) return _wsplt_nomem(wsp); @@ -723,17 +768,23 @@ expvar(struct wordsplit *wsp, const char *str, size_t len, } } else if (wsp->ws_flags & WRDSF_KEEPUNDEF) { size_t size = *pend - start + 1; - + if (wsnode_new(wsp, &newnode)) return 1; wsnode_insert(wsp, newnode, *ptail, 0); *ptail = newnode; - newnode->flags = _WSNF_WORD|_WSNF_NOEXPAND|flg; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; newnode->v.word = malloc(size + 1); if (!newnode->v.word) return _wsplt_nomem(wsp); memcpy(newnode->v.word, start, size); newnode->v.word[size] = 0; + } else { + if (wsnode_new(wsp, &newnode)) + return 1; + wsnode_insert(wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; } return 0; } @@ -747,7 +798,7 @@ node_expand_vars(struct wordsplit *wsp, struct wordsplit_node *node) const char *p; size_t off = 0; struct wordsplit_node *tail = node; - + for (p = str; p < end; p++) { if (*p == '\\') { p++; @@ -758,12 +809,13 @@ node_expand_vars(struct wordsplit *wsp, struct wordsplit_node *node) if (tail != node) tail->flags |= _WSNF_JOIN; - if (node_split_prefix(wsp, &tail, node, off, n, - _WSNF_JOIN)) + if (node_split_prefix + (wsp, &tail, node, off, n, _WSNF_JOIN)) return 1; p++; if (expvar(wsp, p, slen - n, &tail, &p, - node->flags & (_WSNF_JOIN|_WSNF_QUOTE))) + node-> + flags & (_WSNF_JOIN | _WSNF_QUOTE))) return 1; off += p - str + 1; str = p + 1; @@ -788,16 +840,52 @@ wordsplit_varexp(struct wordsplit *wsp) { struct wordsplit_node *p; - for (p = wsp->ws_head; p; ) { + for (p = wsp->ws_head; p;) { struct wordsplit_node *next = p->next; if (!(p->flags & _WSNF_NOEXPAND)) if (node_expand_vars(wsp, p)) return 1; p = next; } + + /* Remove NULL lists */ + for (p = wsp->ws_head; p;) { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_NULL) { + wsnode_remove(wsp, p); + wsnode_free(p); + } + p = next; + } + return 0; } +/* Strip off any leading and trailing whitespace. This function is called + right after the initial scanning, therefore it assumes that every + node in the list is a text reference node. */ +static void +wordsplit_trimws(struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) { + size_t n; + + if (p->flags & _WSNF_QUOTE) + continue; + + /* Skip leading whitespace: */ + for (n = p->v.segm.beg; + n < p->v.segm.end && ISWS(wsp->ws_input[n]); n++); + p->v.segm.beg = n; + /* Trim trailing whitespace */ + for (n = p->v.segm.end; + n > p->v.segm.beg && ISWS(wsp->ws_input[n - 1]); n--); + p->v.segm.end = n; + } +} + static int skip_sed_expr(const char *command, size_t i, size_t len) { @@ -808,13 +896,14 @@ skip_sed_expr(const char *command, size_t i, size_t len) if (command[i] == ';') i++; - if (!(command[i] == 's' && i + 3 < len - && ISPUNCT(command[i+1]))) + if (! + (command[i] == 's' && i + 3 < len + && ISPUNCT(command[i + 1]))) break; delim = command[++i]; state = 1; - for (i++; i < len; i++) { + for (i++; i < len; i++) { if (state == 3) { if (command[i] == delim || !ISALNUM(command[i])) @@ -824,7 +913,8 @@ skip_sed_expr(const char *command, size_t i, size_t len) else if (command[i] == delim) state++; } - } while (state == 3 && i < len && command[i] == ';'); + } + while (state == 3 && i < len && command[i] == ';'); return i; } @@ -836,7 +926,7 @@ skip_delim(struct wordsplit *wsp) do start++; while (start < wsp->ws_len - && ISDELIM(wsp->ws_input[start], wsp->ws_delim)); + && ISDELIM(wsp, wsp->ws_input[start])); start--; } @@ -851,18 +941,18 @@ skip_delim(struct wordsplit *wsp) #define _WRDS_ERR 2 static int -scan_qstring(struct wordsplit *wsp, size_t start, size_t *end) +scan_qstring(struct wordsplit *wsp, size_t start, size_t * end) { size_t j; const char *command = wsp->ws_input; size_t len = wsp->ws_len; char q = command[start]; - + for (j = start + 1; j < len && command[j] != q; j++) if (q == '"' && command[j] == '\\') j++; if (j < len && command[j] == q) { - int flags = _WSNF_QUOTE; + int flags = _WSNF_QUOTE | _WSNF_EMPTYOK; if (q == '\'') flags |= _WSNF_NOEXPAND; if (wordsplit_add_segm(wsp, start + 1, j, flags)) @@ -883,41 +973,30 @@ scan_word(struct wordsplit *wsp, size_t start) { size_t len = wsp->ws_len; const char *command = wsp->ws_input; - const char *delim = wsp->ws_delim; const char *comment = wsp->ws_comment; int join = 0; int flags = 0; - + size_t i = start; if (i >= len) { wsp->ws_errno = WRDSE_EOF; return _WRDS_EOF; } - - if ((wsp->ws_flags & WRDSF_WS) - && !(wsp->ws_flags & WRDSF_RETURN_DELIMS)) { - /* Skip initial whitespace */ - while (ISWS(command[i])) - if (++i == len) { - wsp->ws_errno = WRDSE_EOF; - return _WRDS_EOF; - } - } start = i; if (wsp->ws_flags & WRDSF_SED_EXPR - && command[i] == 's' && i + 3 < len && ISPUNCT(command[i+1])) { + && command[i] == 's' && i + 3 < len + && ISPUNCT(command[i + 1])) { flags = _WSNF_SEXP; i = skip_sed_expr(command, i, len); - } else if (!ISDELIM(command[i], delim)) { + } else if (!ISDELIM(wsp, command[i])) { while (i < len) { if (comment && strchr(comment, command[i]) != NULL) { size_t j; - for (j = i + 1; j < len - && command[j] != '\n'; j++) - ; + for (j = i + 1; + j < len && command[j] != '\n'; j++); if (wordsplit_add_segm(wsp, start, i, 0)) return _WRDS_ERR; wsp->ws_endp = j; @@ -925,19 +1004,20 @@ scan_word(struct wordsplit *wsp, size_t start) } if (wsp->ws_flags & WRDSF_QUOTE) { - if (command[i] == '\\') { + if (command[i] == '\\') { if (++i == len) break; i++; continue; } - - if (command[i] == '\'' || command[i] == '"') { + + if (command[i] == '\'' + || command[i] == '"') { if (join && wsp->ws_tail) - wsp->ws_tail->flags - |= _WSNF_JOIN; - if (wordsplit_add_segm(wsp, start, - i, _WSNF_JOIN)) + wsp->ws_tail->flags |= + _WSNF_JOIN; + if (wordsplit_add_segm + (wsp, start, i, _WSNF_JOIN)) return _WRDS_ERR; if (scan_qstring(wsp, i, &i)) return _WRDS_ERR; @@ -946,8 +1026,7 @@ scan_word(struct wordsplit *wsp, size_t start) } } - if (((wsp->ws_flags & WRDSF_WS) && ISWS(command[i])) - || ISDELIM(command[i], delim)) + if (ISDELIM(wsp, command[i])) break; else i++; @@ -955,9 +1034,10 @@ scan_word(struct wordsplit *wsp, size_t start) } else if (wsp->ws_flags & WRDSF_RETURN_DELIMS) { do { i++; - } while (i < len && ISDELIM(command[i], delim)); - } - + } + while (i < len && ISDELIM(wsp, command[i])); + } else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)) + flags |= _WSNF_EMPTYOK; if (join && i > start && wsp->ws_tail) wsp->ws_tail->flags |= _WSNF_JOIN; @@ -998,13 +1078,12 @@ wordsplit_c_quote_char(int c) #define to_num(c) \ (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) -static int -xtonum(int *pval, const char *src, int base, int cnt) +static int xtonum(int *pval, const char *src, int base, int cnt) { int i, val; for (i = 0, val = 0; i < cnt; i++, src++) { - int n = *(unsigned char*)src; + int n = *(unsigned char *) src; if (n > 127 || (n = to_num(n)) >= base) break; val = val * base + n; @@ -1020,13 +1099,14 @@ wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote) *quote = 0; for (; *str; str++) { - if (*str == ' ') { - len++; + if (strchr(" \"", *str)) *quote = 1; - } else if (*str == '"') { + + if (*str == ' ') + len++; + else if (*str == '"') len += 2; - *quote = 1; - } else if (*str != '\t' && *str != '\\' && ISPRINT(*str)) + else if (*str != '\t' && *str != '\\' && ISPRINT(*str)) len++; else if (quote_hex) len += 3; @@ -1072,19 +1152,19 @@ wordsplit_c_unquote_copy(char *dst, const char *src, size_t n) if (off == 0) { *dst++ = '\\'; *dst++ = src[i++]; - } - else { + } else { *dst++ = c; i += off + 1; } } - } else if ((unsigned char)src[i] < 128 + } else if ((unsigned char) src[i] < 128 && ISDIGIT(src[i])) { if (n - i < 1) { *dst++ = '\\'; *dst++ = src[i++]; } else { - int off = xtonum(&c, src+i, 8, 3); + int off = + xtonum(&c, src + i, 8, 3); if (off == 0) { *dst++ = '\\'; *dst++ = src[i++]; @@ -1094,7 +1174,8 @@ wordsplit_c_unquote_copy(char *dst, const char *src, size_t n) } } } else - *dst++ = wordsplit_c_unquote_char(src[i++]); + *dst++ = + wordsplit_c_unquote_char(src[i++]); } else *dst++ = src[i++]; } @@ -1115,7 +1196,7 @@ wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex) if (quote_hex) { snprintf(tmp, sizeof tmp, "%%%02X", - *(unsigned char*)src); + *(unsigned char *) src); memcpy(dst, tmp, 3); dst += 3; } else { @@ -1125,7 +1206,7 @@ wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex) *dst++ = c; else { snprintf(tmp, sizeof tmp, "%03o", - *(unsigned char*)src); + *(unsigned char *) src); memcpy(dst, tmp, 3); dst += 3; } @@ -1155,6 +1236,9 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp, } else { while ((rc = scan_word(wsp, start)) == _WRDS_OK) start = skip_delim(wsp); + /* Make sure tail element is not joinable */ + if (wsp->ws_tail) + wsp->ws_tail->flags &= ~_WSNF_JOIN; } if (wsp->ws_flags & WRDSF_SHOWDBG) { @@ -1166,7 +1250,16 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp, return wsp->ws_errno; } - /* FIXME: Expand variables & commands here */ + if (wsp->ws_flags & WRDSF_WS) { + /* Trim leading and trailing whitespace */ + wordsplit_trimws(wsp); + if (wsp->ws_flags & WRDSF_SHOWDBG) { + wsp->ws_debug("After WS trimming:"); + wordsplit_dump_nodes(wsp); + } + } + + /* Expand variables (FIXME: & commands) */ if (!(wsp->ws_flags & WRDSF_NOVAR)) { if (wordsplit_varexp(wsp)) { wordsplit_free_nodes(wsp); @@ -1185,17 +1278,18 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp, wsp->ws_debug("After quote removal:"); wordsplit_dump_nodes(wsp); } - + if (wsnode_coalesce(wsp)) break; - + if (wsp->ws_flags & WRDSF_SHOWDBG) { wsp->ws_debug("Coalesced list:"); wordsplit_dump_nodes(wsp); } wordsplit_finish(wsp); - } while (0); + } + while (0); wordsplit_free_nodes(wsp); return wsp->ws_errno; } @@ -1203,12 +1297,28 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp, int wordsplit(const char *command, struct wordsplit *ws, int flags) { - return wordsplit_len(command, strlen (command), ws, flags); + return wordsplit_len(command, strlen(command), ws, flags); +} + +void +wordsplit_free_words(struct wordsplit *ws) +{ + size_t i; + + for (i = 0; i < ws->ws_wordc; i++) { + char *p = ws->ws_wordv[ws->ws_offs + i]; + if (p) { + free(p); + ws->ws_wordv[ws->ws_offs + i] = NULL; + } + } + ws->ws_wordc = 0; } void wordsplit_free(struct wordsplit *ws) { + wordsplit_free_words(ws); free(ws->ws_wordv); ws->ws_wordv = NULL; } @@ -1220,7 +1330,7 @@ wordsplit_perror(struct wordsplit *wsp) case WRDSE_EOF: wsp->ws_error(_("no error")); break; - + case WRDSE_QUOTE: wsp->ws_error(_("missing closing %c (start near #%lu)"), wsp->ws_input[wsp->ws_endp], @@ -1230,9 +1340,11 @@ wordsplit_perror(struct wordsplit *wsp) case WRDSE_NOSPACE: wsp->ws_error(_("memory exhausted")); break; - + case WRDSE_NOSUPP: - wsp->ws_error(_("command substitution is not yet supported")); + wsp-> + ws_error(_ + ("command substitution is not yet supported")); case WRDSE_USAGE: wsp->ws_error(_("invalid wordsplit usage")); @@ -1245,7 +1357,7 @@ wordsplit_perror(struct wordsplit *wsp) case WRDSE_UNDEF: wsp->ws_error(_("undefined variable")); break; - + default: wsp->ws_error(_("unknown error")); } @@ -1261,7 +1373,8 @@ const char *_wordsplit_errstr[] = { N_("unbalanced curly brace"), N_("undefined variable") }; -int _wordsplit_nerrs = sizeof(_wordsplit_errstr)/sizeof(_wordsplit_errstr[0]); +int _wordsplit_nerrs = + sizeof(_wordsplit_errstr) / sizeof(_wordsplit_errstr[0]); const char * wordsplit_strerror(struct wordsplit *ws) @@ -1270,4 +1383,3 @@ wordsplit_strerror(struct wordsplit *ws) return _wordsplit_errstr[ws->ws_errno]; return N_("unknown error"); } - |