diff options
-rw-r--r-- | include/wordsplit.h | 1 | ||||
-rw-r--r-- | src/wordsplit.c | 30 | ||||
-rw-r--r-- | tests/wordsplit.at | 34 |
3 files changed, 62 insertions, 3 deletions
diff --git a/include/wordsplit.h b/include/wordsplit.h index d4975b3..2fac3c6 100644 --- a/include/wordsplit.h +++ b/include/wordsplit.h @@ -99,48 +99,49 @@ struct wordsplit Whatever is stored in RET, it must be allocated using malloc(3). */ void *ws_closure; /* [Input] (WRDSF_CLOSURE) Passed as the CLOS argument to ws_getvar and ws_command. */ int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv, void *clos); /* [Input] (!WRDSF_NOCMD) Returns in the memory location pointed to by RET the expansion of the command CMD (LEN bytes long). On input, ARGV contains CMD split out to words. See ws_getvar for a discussion of possible return values. */ const char *ws_input; /* Input string (the S argument to wordsplit. */ size_t ws_len; /* Length of ws_input. */ size_t ws_endp; /* Points past the last processed byte in ws_input. */ int ws_errno; /* [Output] Error code, if an error occurred. */ char *ws_usererr; /* Points to textual description of the error, if ws_errno is WRDSE_USERERR. Must be allocated with malloc(3). */ struct wordsplit_node *ws_head, *ws_tail; /* Doubly-linked list of parsed out nodes. */ + char ws_sep[2]; /* Temporary storage used during splitting */ int ws_lvl; /* Invocation nesting level. */ }; /* Initial size for ws_env, if allocated automatically */ #define WORDSPLIT_ENV_INIT 16 /* Wordsplit flags. */ /* Append the words found to the array resulting from a previous call. */ #define WRDSF_APPEND 0x00000001 /* Insert ws_offs initial NULLs in the array ws_wordv. (These are not counted in the returned ws_wordc.) */ #define WRDSF_DOOFFS 0x00000002 /* Don't do command substitution. */ #define WRDSF_NOCMD 0x00000004 /* The parameter p resulted from a previous call to wordsplit(), and wordsplit_free() was not called. Reuse the allocated storage. */ #define WRDSF_REUSE 0x00000008 /* Print errors */ #define WRDSF_SHOWERR 0x00000010 /* Consider it an error if an undefined variable is expanded. */ #define WRDSF_UNDEF 0x00000020 /* Don't do variable expansion. */ diff --git a/src/wordsplit.c b/src/wordsplit.c index f563725..4e633fa 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -235,48 +235,51 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, } if (wsp->ws_flags & WRDSF_SHOWDBG) { if (!(wsp->ws_flags & WRDSF_DEBUG)) { if (wsp->ws_flags & WRDSF_ERROR) wsp->ws_debug = wsp->ws_error; else if (wsp->ws_flags & WRDSF_SHOWERR) wsp->ws_debug = _wsplt_error; else wsp->ws_flags &= ~WRDSF_SHOWDBG; } } wsp->ws_input = input; wsp->ws_len = len; if (!(wsp->ws_flags & WRDSF_DOOFFS)) wsp->ws_offs = 0; if (!(wsp->ws_flags & WRDSF_DELIM)) wsp->ws_delim = " \t\n"; + wsp->ws_sep[0] = wsp->ws_delim[0]; + wsp->ws_sep[1] = 0; + if (!(wsp->ws_flags & WRDSF_COMMENT)) wsp->ws_comment = NULL; if (!(wsp->ws_flags & WRDSF_CLOSURE)) wsp->ws_closure = NULL; if (!(wsp->ws_flags & WRDSF_OPTIONS)) wsp->ws_options = 0; if (wsp->ws_flags & WRDSF_ESCAPE) { if (!wsp->ws_escape[WRDSX_WORD]) wsp->ws_escape[WRDSX_WORD] = ""; if (!wsp->ws_escape[WRDSX_QUOTE]) wsp->ws_escape[WRDSX_QUOTE] = ""; } else { if (wsp->ws_flags & WRDSF_CESCAPES) { wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; @@ -328,49 +331,49 @@ alloc_space (struct wordsplit *wsp, size_t count) ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); } else return 0; if (ptr) { wsp->ws_wordn = newalloc; wsp->ws_wordv = ptr; } else return _wsplt_nomem (wsp); return 0; } /* Node state flags */ #define _WSNF_NULL 0x01 /* null node (a noop) */ #define _WSNF_WORD 0x02 /* node contains word in v.word */ #define _WSNF_QUOTE 0x04 /* text is quoted */ #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ #define _WSNF_JOIN 0x10 /* node must be joined with the next node */ #define _WSNF_SEXP 0x20 /* is a sed expression */ #define _WSNF_DELIM 0x40 /* node is a delimiter */ - +#define _WSNF_CONST 0x80 /* with _WSNF_WORD: v.word is constant */ #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that wordsplit_add_segm must add the segment even if it is empty */ struct wordsplit_node { struct wordsplit_node *prev; /* Previous element */ struct wordsplit_node *next; /* Next element */ int flags; /* Node flags */ union { struct { size_t beg; /* Start of word in ws_input */ size_t end; /* End of word in ws_input */ } segm; char *word; } v; }; static const char * wsnode_flagstr (int flags) { static char retbuf[7]; @@ -420,49 +423,49 @@ wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) static size_t wsnode_len (struct wordsplit_node *p) { if (p->flags & _WSNF_NULL) return 0; else if (p->flags & _WSNF_WORD) return strlen (p->v.word); else return p->v.segm.end - p->v.segm.beg; } static int wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) { struct wordsplit_node *node = calloc (1, sizeof (*node)); if (!node) return _wsplt_nomem (wsp); *pnode = node; return 0; } static void wsnode_free (struct wordsplit_node *p) { - if (p->flags & _WSNF_WORD) + if ((p->flags & (_WSNF_WORD|_WSNF_CONST)) == _WSNF_WORD) free (p->v.word); free (p); } static void wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) { node->next = NULL; node->prev = wsp->ws_tail; if (wsp->ws_tail) wsp->ws_tail->next = node; else wsp->ws_head = node; wsp->ws_tail = node; } static void wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) { struct wordsplit_node *p; p = node->prev; if (p) { @@ -1229,86 +1232,109 @@ expvar_recover (struct wordsplit *wsp, const char *str, if (wsnode_new (wsp, &newnode)) return 1; wsnode_insert (wsp, newnode, *ptail, 0); *ptail = newnode; newnode->flags = _WSNF_WORD | flg; newnode->v.word = malloc (3); if (!newnode->v.word) return _wsplt_nomem (wsp); newnode->v.word[0] = '$'; newnode->v.word[1] = str[0]; newnode->v.word[2] = 0; *pend = str; return 0; } static int expand_paramv (struct wordsplit *wsp, struct wordsplit_node **ptail, int flg, int q) { struct wordsplit ws; int wsflags = WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_QUOTE | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) | (q ? WRDSF_NOSPLIT : 0); size_t i; + struct wordsplit_node *tail = *ptail; for (i = 0; i < wsp->ws_paramc; i++) { struct wordsplit_node *np; int rc = _wsplt_subsplit (wsp, &ws, wsp->ws_paramv[i], strlen (wsp->ws_paramv[i]), wsflags, q); if (rc) { _wsplt_seterr_sub (wsp, &ws); wordsplit_free (&ws); return 1; } if (q) { if (wsnode_new (wsp, &np)) return 1; wsnode_insert (wsp, np, *ptail, 0); *ptail = np; np->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; np->v.word = ws.ws_wordv[0]; ws.ws_wordv[0] = NULL; } else { for (np = ws.ws_head; np; np = np->next) np->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; wsnode_insert (wsp, ws.ws_head, *ptail, 0); *ptail = ws.ws_tail; ws.ws_head = ws.ws_tail = NULL; } wsflags |= WRDSF_REUSE; } if (wsflags & WRDSF_REUSE) wordsplit_free (&ws); + + if (flg & _WSNF_QUOTE) + { + tail = tail->next; + /* Insert delimiters, mark nodes as joinable */ + while (tail != *ptail) + { + struct wordsplit_node *next = tail->next; + struct wordsplit_node *newnode; + + tail->flags |= _WSNF_JOIN; + + if (wsnode_new (wsp, &newnode)) + return 1; + newnode->flags = _WSNF_WORD | _WSNF_CONST | _WSNF_NOEXPAND | _WSNF_JOIN; + newnode->v.word = wsp->ws_sep; + + wsnode_insert (wsp, newnode, tail, 0); + tail = next; + } + } + return 0; } static int expvar (struct wordsplit *wsp, const char *str, size_t len, struct wordsplit_node **ptail, const char **pend, int flg) { size_t i = 0; const char *defstr = NULL; char *value; const char *vptr; struct wordsplit_node *newnode; const char *start = str - 1; int rc; struct wordsplit ws; int is_param = 0; long param_idx = 0; if (ISVARBEG (str[0])) { for (i = 1; i < len; i++) if (!ISVARCHR (str[i])) break; *pend = str + i - 1; diff --git a/tests/wordsplit.at b/tests/wordsplit.at index 0a9c4d6..0a7d7db 100644 --- a/tests/wordsplit.at +++ b/tests/wordsplit.at @@ -953,46 +953,78 @@ TOTAL: 3 TESTWSP([command nosplit],[],[-nocmd -nocmdsplit], [begin $(words a b) end], [NF: 3 0: begin 1: "$(words a b)" 2: end TOTAL: 3 ]) TESTWSP([positional parameters],[],[one two three four five six seven eight nine ten eleven twelve], [$0 $5 ${10} $#], [NF: 3 0: one 1: six 2: eleven TOTAL: 3 NF: 1 0: 12 TOTAL: 1 ]) TESTWSP([$* and $@],[],['one two' three 'four five'], [$* -$@], +$@ +"$*" +"$@"], [NF: 5 0: one 1: two 2: three 3: four 4: five TOTAL: 5 NF: 3 0: "one two" 1: three 2: "four five" TOTAL: 3 +NF: 1 +0: "one two three four five" +TOTAL: 1 +NF: 1 +0: "one two three four five" +TOTAL: 1 +]) + +TESTWSP([$* and $@ in nosplit mode],[], +[-trimnl -nosplit 'one two' three 'four five'], +[$* +$@], +[NF: 1 +0: "one two three four five" +TOTAL: 1 +NF: 1 +0: "one two three four five" +TOTAL: 1 +]) + +TESTWSP([$* and $@ in nosplit mode with delimiter],[], +[-trimnl -nosplit -delim : 'one two' three 'four five'], +[$* +$@], +[NF: 1 +0: "one two:three:four five" +TOTAL: 1 +NF: 1 +0: "one two:three:four five" +TOTAL: 1 ]) m4_popdef([TESTWSP]) m4_popdef([wspnum]) m4_popdef([wspid]) m4_popdef([genkw]) m4_popdef([wspgroupnum]) m4_popdef([wspgroupname]) m4_popdef([WSPGROUP]) |