diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-10-28 15:40:20 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-10-28 17:45:43 +0200 |
commit | c28a77d5990eba3734d6d85002a52cf4c332124a (patch) | |
tree | 8dd20f4ff136b6c96e1caa6b4942b9ad6dde4b7f | |
parent | cb11e89626cc2a2bf1b63cdebc32a498fd65dd59 (diff) | |
download | grecs-c28a77d5990eba3734d6d85002a52cf4c332124a.tar.gz grecs-c28a77d5990eba3734d6d85002a52cf4c332124a.tar.bz2 |
Improve wordsplit
* src/wordsplit.c: Implement default assignment, word
expansion in variable defaults, distinction between
${variable:-word} and ${variable-word}.
* doc/wordsplit.3: New file.
* src/wordsplit.h (wordsplit)<ws_envbuf,ws_envidx>
<ws_envsiz>: New members.
(WRDSF_ARGV): Remove.
(WRDSF_OPTIONS): New flag.
(WRDSO_ARGV): New option bit.
* tests/wordsplit.at: Add new tests.
* tests/wsp.c: Set WRDSF_OPTIONS flag if one of the options is requested.
-rw-r--r-- | doc/wordsplit.3 | 585 | ||||
-rw-r--r-- | src/wordsplit.c | 368 | ||||
-rw-r--r-- | src/wordsplit.h | 37 | ||||
-rw-r--r-- | tests/wordsplit.at | 41 | ||||
-rw-r--r-- | tests/wsp.c | 76 |
5 files changed, 1002 insertions, 105 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3 new file mode 100644 index 0000000..2f0cced --- /dev/null +++ b/doc/wordsplit.3 @@ -0,0 +1,585 @@ +.\" This file is part of grecs -*- nroff -*- +.\" Copyright (C) 2007, 2009-2014 Sergey Poznyakoff +.\" +.\" Grecs is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 3, or (at your option) +.\" any later version. +.\" +.\" Grecs is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>. +.\" +.TH WORDSPLIT 3 "October 28, 2014" "GRECS" "Grecs User Reference" +.SH NAME +wordsplit \- split string into words +.SH SYNOPSIS +.B #include <wordsplit.h> +.sp +\fBint wordsplit (const char *\fIs\fB,\ + wordsplit_t *\fIws\fB, int \fIflags\fB);\fR +.sp +\fBint wordsplit_len (const char *\fIs\fB,\ + \fBsize_t \fIlen\fR,\ + \fBwordsplit_t *\fIp\fB,\ + int \fIflags\fB); +.sp +\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR +.sp +\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR +.sp +\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR +.sp +\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR +.sp +\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR +.SH DESCRIPTION +The function \fBwordsplit\fR splits the string \fIs\fR into words +using a set of rules governed by \fIflags\fR and stores the result +in the memory location pointed to by \fIws\fR. Depending on +\fIflags\fR, the function performs the following: whitespace trimming, +tilde expansion, variable expansion, quote removal, command +substitution, and path expansion. On success, the function returns 0 +and stores the words found in the member \fBws_wordv\fR and the number +of words in the member \fBws_wordc\fR. On error, -1 is returned and +error code is stored in \fBws_errno\fR. +.PP +The function \fBwordsplit_len\fR acts similarly, except that it +accesses only first \fBlen\fR bytes of the string \fIs\fR, which is +not required to be null-terminated. +.PP +When no longer needed, the resources allocated by a call to one of +these functions must be freed using +.BR wordsplit_free . +.PP +The function +.B wordsplit_free_words +frees only the memory allocated for elements of +.I ws_wordv +and initializes +.I ws_wordc +to zero. +.PP +The usual calling sequence is: +.PP +.EX +wordsplit_t ws; +int rc; + +if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) { + wordsplit_perror(&ws); + return; +} +for (i = 0; i < ws.ws_wordc; i++) { + /* do something with ws.ws_wordv[i] */ +} +wordsplit_free(&ws); +.EE +.PP +The function +.B wordsplit_perror +prints error message from the last invocation of \fBwordsplit\fR. It +uses the function pointed to by the +.I ws_error +member. By default, it outputs the message on the standard error. +.PP +For more sophisticated error reporting, the function +.B wordsplit_strerror +can be used. It returns a pointer to the string describing the error. +The caller should treat this pointer as a constant string. It should +not try to alter or deallocate it. +.PP +The function +.B wordsplit_clearerr +clears the error condition associated with \fIws\fR. +.SH EXPANSION +The number of expansions performed on the input is controlled by +appropriate bits set in the \fIflags\fR argument. Whatever expansions +are enabled, they are always run in the same order as described in this +section. +.SS Whitespace trimming +Whitespace trimming removes any leading and trailing whitespace from +the initial word array. It is enabled by the +.B WRDSF_WS +flag. Whitespace trimming is needed only if you redefine +word delimiters (\fIws_delim\fR member) so that they don't contain +whitespace characters (\fB\(dq \\t\\n\(dq\fR). +.SS Tilde expansion +Tilde expansion is enabled if the +.B WRDSF_PATHEXPAND +bit is set. It expands all words that begin with an unquoted tilde +character (`\fB~\fR'). If tilde is followed immediately by a slash, +it is replaced with the home directory of the current user (as +determined by his \fBpasswd\fR entry). A tilde alone is handled the +same way. Otherwise, the characters between the tilde and first slash +character (or end of string, if it doesn't contain any) are treated as +a login name. and are replaced (along with the tilde itself) with the +home directory of that user. If there is no user with such login +name, the word is left unchanged. +.SS Variable expansion +Variable expansion replaces each occurrence of +.BI $ NAME +or +.BI ${ NAME } +with the value of the variable \fINAME\fR. It is enabled if the +flag \fBWRDSF_NOVAR\fR is not set. The caller is responsible for +supplying the table of available variables. Two mechanisms are +provided: environment array and a callback function. +.PP +Environment array is a \fBNULL\fR-terminated array of variables, +stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be +set in order to instruct \fBwordsplit\fR to use this array. +.PP +By default, elements of the \fIws_env\fR array have the form +.IR NAME = VALUE . +An alternative format is enabled by the +.B WRDSF_ENV_KV +flag. When it is set, each variable is described by two consecutive +elements in the array: +.IR ws_env [ n ] +contains variable name, and +.IR ws_env [ "n+1" ] +contains its value. +.PP +More sophisticated variable tables can be implemented using +callback function. The \fIws_getvar\fR member should be set to point +to that function and \fBWRDSF_GETVAR\fR flag must be set. The +function itself should be defined as +.EX +int getvar (char **ret, const char *var, size_t len, void *clos); +.EE +.PP +The function should look up for the variable identified by the first +\fIlen\fR bytes of the string \fIvar\fR. If such variable is found, +th function stores a copy of its value (allocated using +\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and +returns \fBWRDSE_OK\fR. If the variable is not found, the function +returns \fBWRDSE_UNDEF\fR. Otherwise, a non NULL error code is +returned. +.PP +If \fIws_getvar\fR returns +.BR WRDSE_USERERR , +it must store the pointer to the error description string in +.BR *ret . +In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the +data returned in \fBret\fR must be allocated using +.BR malloc (3). +.PP +If both +.I ws_env +and +.I ws_getvar +are used, the variable is first looked up in +.IR ws_env , +and if not found there, the +.I ws_getvar +function is called. +.PP +During variable expansion, the forms below cause +.B wordsplit +to test for a variable that is unset or null. Omitting the +colon results in a test only for a variable that is unset. +.TP +.BI ${ variable :- word } +.BR "Use Default Values" . +If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted. +Otherwise, the value of \fIvariable\fR is substituted. +.TP +.BI ${ variable := word } +.BR "Assign Default Values" . +If \fIvariable\fR is unset or null, the expansion of \fIword\fR is +assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted. +.TP +.BI ${ variable :? word } +.BR "Display Error if Null or Unset" . +If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a +message to that effect if word is not present) is output using +.IR ws_error . +Otherwise, the value of \fIvariable\fR is substituted. +.TP +.BI ${ variable :+ word } +.BR "Use Alternate Value" . +If \fIvariable\fR is null or unset, nothing is substituted, otherwise the +expansion of \fIword\fR is substituted. +.SS Quote removal +.SS Command substitution +.SS Path expansion +.SH WORDSPLIT_T STRUCTURE +The data type \fBwordsplit_t\fR has three members that contain +output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR, +and a number of members that the caller can initialize on input in +order to customize the function behavior. Each its member has a +corresponding flag bit, which must be set in the \fIflags\fR argument +in order to instruct the \fBwordsplit\fR function to use it. +.SS OUTPUT +.TP +.BI size_t " ws_wordc" +Number of words in \fIws_wordv\fR. Accessible upon successful return +from \fBwordsplit\fR. +.TP +.BI "char ** " ws_wordv +Array of resulting words. Accessible upon successful return +from \fBwordsplit\fR. +.TP +.BI "int " ws_errno +Error code, if the invocation of \fBwordsplit\fR or +\fBwordsplit_len\fR failed. This is the same value as returned from +the function in that case. +.SS INPUT +.TP +.BI "size_t " ws_offs +If the +.B WRDSF_DOOFFS +flag is set, this member specifies the number of initial elements in +.I ws_wordv +to fill with NULLs. These elements are not counted in the returned +.IR ws_wordc . +.TP +.BI "int " ws_flags +Contains flags passed to wordsplit on entry. Can be used as a +read-only member when using \fBwordsplit\fR in incremental mode or +in a loop with +.B WRDSF_REUSE +flag set. +.TP +.BI "int " ws_options +Additional options used when +.B WRDSF_OPTIONS +is set. +.TP +.BI "const char *" ws_delim +Word delimiters. If initialized on input, the +.B WRDSF_DELIM +flag must be set. Otherwise, it is initialized on entry to +.B wordsplit +with the string \fB\(dq \\t\\n\(dq\fR. +.TP +.BI "const char *" ws_comment +A zero-terminated string of characters that begin an inline comment. +If initialized on input, the +.B WRDSF_COMMENT +flag must be set. By default, it's value is \fB\(dq#\(dq\fR. +.TP +.BI "const char *" ws_escape +Characters to be escaped with backslash. The +.B WRDSF_ESCAPE +flag must be set if this member is initialized. +.TP +.BI "void (*" ws_alloc_die ") (wordsplit_t *)" +This function is called when +.B wordsplit +is unable to allocate memory and the +.B WRDSF_ENOMEMABRT +flag was set. The default function prints a +message on standard error and aborts. This member can be used +to customize error handling. If initialized, the +.B WRDSF_ALLOC_DIE +flag must be set. +.TP +.BI "void (*" ws_error ") (const char *, ...)" +Pointer to function used for error reporting. The invocation +convention is the same as for +.BR printf (3). +The default function formats and prints the message on the standard +error. + +If this member is initialized, the +.B WRDSF_ERROR +flag must be set. +.TP +.BI "void (*" ws_debug ") (const char *, ...)" +Pointer to function used for debugging output. By default it points +to the same function as +.BR ws_error . +If initialized, the +.B WRDSF_DEBUG +flag must be set. +.TP +.BR "const char **" ws_env +A \fBNULL\fR-terminated array of environment variables. It is used +during variable expansion. If set, the +.B WRDSF_ENV +flag must be set. Variable expansion is enabled only if either +.B WRDSF_ENV +or +.B WRDSF_GETVAR +(see below) is set, and +.B WRDSF_NOVAR +flag is not set. + +Each element of +.I ws_env +must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is +the name of the variable, and \fIVALUE\fR is its value. +Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is +described by two elements of +.IR ws_env : one containing variable name, and the next one with its +value. +.TP +.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)" +Points to the function that will be used during variable expansion to +look up for the value of the environment variable named \fBvar\fR. +This function is used if the variable expansion is enabled (i.e. the +.B WRDSF_NOVAR +flag is not set), and the \fBWRDSF_GETVAR\fR flag is set. + +If both +.B WRDSF_ENV +and +.B WRDSF_GETVAR +are set, the variable is first looked up in the +.I ws_env +array and, if not found there, +.I ws_getvar +is called. + +The name of the variable is specified by the first \fIlen\fR bytes of +the string \fIvar\fR. The \fIclos\fR parameter supplies the +user-specific data (see below the description of \fIws_closure\fR +member) and the \fBret\fR parameter points to the memory location used +for output data. On success, the function must store ther a pointer +to the string with the value of the variable and return 0. On error, +it must return one of the error codes described in the section +.BR "ERROR CODES" . +If \fIws_getvar\fR returns +.BR WRDSE_USERERR , +it must store the pointer to the error description string in +.BR *ret . +In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the +data returned in \fBret\fR must be allocated using +.BR malloc (3). +.TP +.BI "void *" ws_closure +Additional user-specific data passed as the last argument to +.I ws_getvar +or +.I ws_command +(see below). If defined, the +.B WRDSF_CLOSURE +flag must be set. +.TP +\fBint (*\fIws_command\fB)\ + (char **ret,\ + const char *cmd,\ + size_t len,\ + char **argv,\ + void *clos) +Pointer to the function that performs command substitution. It treats +the first \fIlen\fR bytes of the string \fIcmd\fR as a command +(whatever it means for the caller) and attempts to execute it. On +success, a pointer to the string with the command output is stored +in the memory location pointed to by \fBret\fR and \fB0\fR is +returned. On error, +the function must return one of the error codes described in the section +.BR "ERROR CODES" . +If \fIws_getvar\fR returns +.BR WRDSE_USERERR , +it must store the pointer to the error description string in +.BR *ret . +In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the +data returned in \fBret\fR must be allocated using +.BR malloc (3). + +If the +.I WRDSO_ARGV +option is set, the parameter \fBargv\fR contains the command split into +words using the same settings as the input \fIws\fR structure, with +command substitution disabled. + +The \fIclos\fR parameter supplies user-specific data (see the +description of \fIws_closure\fR member). +.SH FLAGS +The following macros are defined for use in the \fBflags\fR argument. +.TP +.B WRDSF_DEFFLAGS +Default flags. This is a shortcut for: + +\fB(WRDSF_NOVAR |\ + WRDSF_NOCMD |\ + WRDSF_QUOTE |\ + WRDSF_SQUEEZE_DELIMS |\ + WRDSF_CESCAPES)\fR, + +i.e.: disable variable expansion and quote substituton, perform quote +removal, treat any number of consequtive delimiters as a single +delimiter, replace \fBC\fR escapes appearing in the input string with +the corresponding characters. +.TP +.B WRDSF_APPEND +Append the words found to the array resulting from a previous call to +\fBwordsplit\fR. +.TP +.B WRDSF_DOOFFS +Insert +.I ws_offs +initial +.BR NULL s +in the array +.IR ws_wordv . +These are not counted in the returned +.IR ws_wordc . +.TP +.B WRDSF_NOCMD +Don't do command substitution. +.TP +.B WRDSF_REUSE +The parameter \fIws\fR resulted from a previous call to +\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the +allocated storage. +.TP +.B WRDSF_SHOWERR +Print errors using +.BR ws_error . +.TP +.B WRDSF_UNDEF +Consider it an error if an undefined variable is expanded. +.TP +.B WRDSF_NOVAR +Don't do variable expansion. +.TP +.B WRDSF_ENOMEMABRT +Abort on +.B ENOMEM +error. By default, out of memory errors are treated as any other +errors: the error is reported using \fIws_error\fR if the +.B WRDSF_SHOWERR +flag is set, and error code is returned. If this flag is set, the +.B ws_alloc_die +function is called instead. This function is not supposed to return. +.TP +.B WRDSF_WS +Trim off any leading and trailind whitespace from the returned +words. This flag is useful if the \fIws_delim\fR member does not +contain whitespace characters. +.TP +.B WRDSF_SQUOTE +Handle single quotes. +.TP +.B WRDSF_DQUOTE +Handle double quotes. +.TP +.B WRDSF_QUOTE +A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR. +.TP +.B WRDSF_SQUEEZE_DELIMS +Replace each input sequence of repeated delimiters with a single +delimiter. +.TP +.B WRDSF_RETURN_DELIMS +Return delimiters. +.TP +.B WRDSF_SED_EXPR +Treat +.BR sed (1) expressions as words. +.TP +.B WRDSF_DELIM +.I ws_delim +member is initialized. +.TP +.B WRDSF_COMMENT +.I ws_comment +member is initialized. +.TP +.B WRDSF_ALLOC_DIE +.I ws_alloc_die +member is initialized. +.TP +.B WRDSF_ERROR +.I ws_error +member is initialized. +.TP +.B WRDSF_DEBUG +.I ws_debug +member is initialized. +.TP +.B WRDSF_ENV +.I ws_env +member is initialized. +.TP +.B WRDSF_GETVAR +.I ws_getvar member is initialized. +.TP +.B WRDSF_SHOWDBG +Enable debugging. +.TP +.B WRDSF_NOSPLIT +Don't split input into words. This flag is is useful for side +effects, e.g. to perform variable expansion within a string. +.TP +.B WRDSF_KEEPUNDEF +Keep undefined variables in place, instead of expanding them to +empty strings. +.TP +.B WRDSF_WARNUNDEF +Warn about undefined variables. +.TP +.B WRDSF_CESCAPES +Handle \fBC\fR-style escapes in the input string. +.TP +.B WRDSF_CLOSURE +.I ws_closure +is set. +.TP +.B WRDSF_ENV_KV +Each two consecutive elements in the +.I ws_env +array describe a single variable: +.IR ws_env [ n ] +contains variable name, and +.IR ws_env [ "n+1" ] +contains its value. +.TP +.B WRDSF_ESCAPE 0x10000000 +.I ws_escape +is set. +.TP +.B WRDSF_INCREMENTAL +Incremental mode. Each subsequent call to \fBwordsplit\fR with +\fBNULL\fR as its first argument parses the next word from the input. +See the section +.B INCREMENTAL MODE +for a detailed discussion. +.TP +.B WRDSF_PATHEXPAND +Perform pathname and tilde expansion. If this flag is set, the +\fIws_options\fR member must also be initialized. See the +subsection +.B "Pathname expansion" +for details. +.TP +.B WRDSF_OPTIONS +The +.I ws_options +member is initialized. +.SH "RETURN VALUE" +.SH EXAMPLE +.SH "SEE ALSO" +.SH AUTHORS +Sergey Poznyakoff +.SH "BUG REPORTS" +Report bugs to <gray+grecs@gnu.org.ua>. +.SH COLOPHON +The \fBGrecs\fR library is constantly changing, so this manual page +may be incorrect or out-of-date. For the latest copy of \fBGrecs\fR +documentation, visit <http://www.gnu.org.ua/software/grecs>. +.SH COPYRIGHT +Copyright \(co 2011 Sergey Poznyakoff +.br +.na +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> +.br +.ad +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.\" Local variables: +.\" eval: (add-hook 'write-file-hooks 'time-stamp) +.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \"" +.\" time-stamp-format: "%:B %:d, %:y" +.\" time-stamp-end: "\"" +.\" time-stamp-line-limit: 20 +.\" end: + diff --git a/src/wordsplit.c b/src/wordsplit.c index a836bbc..c726239 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -114,11 +114,30 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, wss->ws_error = wsp->ws_error; wss->ws_alloc_die = wsp->ws_alloc_die; + if (!(flags & WRDSF_NOVAR)) + { + wss->ws_env = wsp->ws_env; + wss->ws_getvar = wsp->ws_getvar; + flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR); + } + if (!(flags & WRDSF_NOCMD)) + { + wss->ws_command = wsp->ws_command; + } + + if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD)) + { + wss->ws_closure = wsp->ws_closure; + flags |= wsp->ws_flags & WRDSF_CLOSURE; + } + + wss->ws_options = wsp->ws_options; + flags |= WRDSF_DELIM | WRDSF_ALLOC_DIE | WRDSF_ERROR | WRDSF_DEBUG - | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR)); + | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS)); return wordsplit_run (str, len, wss, flags, wsp->ws_lvl + 1); } @@ -168,12 +187,11 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, if (!(wsp->ws_flags & WRDSF_ERROR)) wsp->ws_error = _wsplt_error; - if (!(wsp->ws_flags & WRDSF_NOVAR) - && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) + if (!(wsp->ws_flags & WRDSF_NOVAR)) { - _wsplt_seterr (wsp, WRDSE_USAGE); - errno = EINVAL; - return wsp->ws_errno; + /* These will be initialized on first variable assignment */ + wsp->ws_envidx = wsp->ws_envsiz = 0; + wsp->ws_envbuf = NULL; } if (!(wsp->ws_flags & WRDSF_NOCMD)) @@ -214,6 +232,9 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, if (!(wsp->ws_flags & WRDSF_CLOSURE)) wsp->ws_closure = NULL; + if (!(wsp->ws_flags & WRDSF_OPTIONS)) + wsp->ws_options = 0; + wsp->ws_endp = 0; wordsplit_init0 (wsp); @@ -717,13 +738,14 @@ find_closing_paren (const char *str, size_t i, size_t len, size_t *poff, return 1; } -static const char * -wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +static int +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len, + char const **ret) { size_t i; if (!(wsp->ws_flags & WRDSF_ENV)) - return NULL; + return WRDSE_UNDEF; if (wsp->ws_flags & WRDSF_ENV_KV) { @@ -732,14 +754,17 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) { size_t elen = strlen (wsp->ws_env[i]); if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) - return wsp->ws_env[i + 1]; + { + *ret = wsp->ws_env[i + 1]; + return WRDSE_OK; + } /* Skip the value. Break the loop if it is NULL. */ i++; if (wsp->ws_env[i] == NULL) break; } } - else + else if (wsp->ws_env) { /* Usual (A=B) environment. */ for (i = 0; wsp->ws_env[i]; i++) @@ -751,10 +776,117 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (name[j] != var[j]) break; if (j == len && var[j] == '=') - return var + j + 1; + { + *ret = var + j + 1; + return WRDSE_OK; + } + } + } + return WRDSE_UNDEF; +} + +static int +wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen, + char *value) +{ + int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1; + char *v; + + if (wsp->ws_envidx + n >= wsp->ws_envsiz) + { + size_t sz; + char **newenv; + + if (!wsp->ws_envbuf) + { + if (wsp->ws_flags & WRDSF_ENV) + { + size_t i = 0, j; + + if (wsp->ws_env) + { + for (; wsp->ws_env[i]; i++) + ; + } + + sz = i + n + 1; + + newenv = calloc (sz, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + + for (j = 0; j < i; j++) + { + newenv[j] = strdup (wsp->ws_env[j]); + if (!newenv[j]) + { + for (; j > 1; j--) + free (newenv[j-1]); + free (newenv[j-1]); + return _wsplt_nomem (wsp); + } + } + newenv[j] = NULL; + + wsp->ws_envbuf = newenv; + wsp->ws_envidx = i; + wsp->ws_envsiz = sz; + wsp->ws_env = (const char**) wsp->ws_envbuf; + } + else + { + newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_envidx = 0; + wsp->ws_envsiz = WORDSPLIT_ENV_INIT; + wsp->ws_env = (const char**) wsp->ws_envbuf; + wsp->ws_flags |= WRDSF_ENV; + } + } + else + { + wsp->ws_envsiz *= 2; + newenv = realloc (wsp->ws_envbuf, + wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_env = (const char**) wsp->ws_envbuf; } } - return NULL; + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + char *p = malloc (namelen + 1); + if (!p) + return _wsplt_nomem (wsp); + memcpy (p, name, namelen); + p[namelen] = 0; + + v = strdup (value); + if (!v) + { + free (p); + return _wsplt_nomem (wsp); + } + wsp->ws_env[wsp->ws_envidx++] = p; + wsp->ws_env[wsp->ws_envidx++] = v; + } + else + { + v = malloc (namelen + strlen(value) + 2); + if (!v) + return _wsplt_nomem (wsp); + memcpy (v, name, namelen); + v[namelen++] = '='; + strcpy(v + namelen, value); + wsp->ws_env[wsp->ws_envidx++] = v; + } + wsp->ws_env[wsp->ws_envidx++] = NULL; + return WRDSE_OK; } static int @@ -767,7 +899,9 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, const char *vptr; struct wordsplit_node *newnode; const char *start = str - 1; - + int rc; + struct wordsplit ws; + if (ISVARBEG (str[0])) { for (i = 1; i < len; i++) @@ -780,23 +914,35 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, str++; len--; for (i = 1; i < len; i++) - if (str[i] == '}' || str[i] == ':') - break; - if (str[i] == ':') - { - size_t j; - - defstr = str + i + 1; - if (find_closing_paren (str, i + 1, len, &j, "{}")) - return _wsplt_seterr (wsp, WRDSE_CBRACE); - *pend = str + j; - } - else if (str[i] == '}') { - defstr = NULL; - *pend = str + i; + if (str[i] == ':') + { + size_t j; + + defstr = str + i + 1; + if (find_closing_paren (str, i + 1, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + break; + } + else if (strchr ("-+?=", str[i])) + { + size_t j; + + defstr = str + i; + if (find_closing_paren (str, i, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; + } } - else + if (i == len) return _wsplt_seterr (wsp, WRDSE_CBRACE); } else @@ -821,64 +967,99 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, i - its length defstr - default replacement str */ - vptr = wordsplit_find_env (wsp, str, i); - if (vptr) + if (defstr && strchr("-+?=", defstr[0]) == 0) { - value = strdup (vptr); - if (!value) - return _wsplt_nomem (wsp); + rc = WRDSE_UNDEF; + defstr = NULL; } - else if (wsp->ws_flags & WRDSF_GETVAR) + else { - int rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure); - switch (rc) + rc = wordsplit_find_env (wsp, str, i, &vptr); + if (rc == WRDSE_OK) { - case WRDSE_OK: - break; - - case WRDSE_NOSPACE: - return _wsplt_nomem (wsp); - - case WRDSE_UNDEF: - value = NULL; - break; + value = strdup (vptr); + if (!value) + rc = WRDSE_NOSPACE; + } + else if (wsp->ws_flags & WRDSF_GETVAR) + rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure); + else + rc = WRDSE_UNDEF; - case WRDSE_USERERR: - if (wsp->ws_errno == WRDSE_USERERR) - free (wsp->ws_usererr); - wsp->ws_usererr = value; - /* fall through */ - default: - _wsplt_seterr (wsp, rc); - return 1; + if (rc == WRDSE_OK && value[0] == 0 && defstr && defstr[-1] == ':') + { + free (value); + rc = WRDSE_UNDEF; } } - else - value = NULL; - if (!value) + switch (rc) { + case WRDSE_OK: + if (defstr && *defstr == '+') + { + size_t size = *pend - ++defstr; + + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD))); + if (rc) + return rc; + free (value); + value = ws.ws_wordv[0]; + ws.ws_wordv[0] = NULL; + wordsplit_free (&ws); + } + break; + + case WRDSE_UNDEF: if (defstr) { size_t size; - if (*defstr == '-') + if (*defstr == '-' || *defstr == '=') { size = *pend - ++defstr; - value = malloc (size + 1); - if (!value) - return _wsplt_nomem (wsp); - memcpy (value, defstr, size); - value[size] = 0; + + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD))); + if (rc) + return rc; + + value = ws.ws_wordv[0]; + ws.ws_wordv[0] = NULL; + wordsplit_free (&ws); + + if (defstr[-1] == '=') + wsplt_assign_var (wsp, str, i, value); } - else if (*defstr == '?') + else { - size = *pend - ++defstr; - if (size == 0) - wsp->ws_error (_("%.*s: variable null or not set"), - (int) i, str); - else - wsp->ws_error ("%.*s: %.*s", - (int) i, str, (int) size, defstr); + if (*defstr == '?') + { + size = *pend - ++defstr; + if (size == 0) + wsp->ws_error (_("%.*s: variable null or not set"), + (int) i, str); + else + { + rc = _wsplt_subsplit (wsp, &ws, defstr, size, + WRDSF_NOSPLIT | WRDSF_WS | + WRDSF_QUOTE | + (wsp->ws_flags & + (WRDSF_NOVAR | WRDSF_NOCMD))); + if (rc == 0) + wsp->ws_error ("%.*s: %s", + (int) i, str, ws.ws_wordv[0]); + else + wsp->ws_error (_("%.*s: %.*s"), + (int) i, str, (int) size, defstr); + wordsplit_free (&ws); + } + } + value = NULL; } } else if (wsp->ws_flags & WRDSF_UNDEF) @@ -900,18 +1081,21 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, return _wsplt_nomem (wsp); } } + break; + + case WRDSE_NOSPACE: + return _wsplt_nomem (wsp); + + case WRDSE_USERERR: + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_usererr = value; + /* fall through */ + default: + _wsplt_seterr (wsp, rc); + return 1; } - else if (defstr && *defstr == '+') - { - size_t size = *pend - ++defstr; - free (value); - value = malloc (size + 1); - if (!value) - return _wsplt_nomem (wsp); - memcpy (value, defstr, size); - value[size] = 0; - } - + if (value) { if (flg & _WSNF_QUOTE) @@ -1112,7 +1296,7 @@ expcmd (struct wordsplit *wsp, const char *str, size_t len, } *pend = str + j; - if (wsp->ws_flags & WRDSF_ARGV) + if (wsp->ws_options & WRDSO_ARGV) { struct wordsplit ws; @@ -2035,6 +2219,23 @@ wordsplit_free_words (struct wordsplit *ws) } void +wordsplit_free_envbuf (struct wordsplit *ws) +{ + if (ws->ws_flags & WRDSF_NOCMD) + return; + if (ws->ws_envbuf) + { + size_t i; + + for (i = 0; ws->ws_envbuf[i]; i++) + free (ws->ws_envbuf[i]); + free (ws->ws_envbuf); + ws->ws_envidx = ws->ws_envsiz = 0; + ws->ws_envbuf = NULL; + } +} + +void wordsplit_clearerr (struct wordsplit *ws) { if (ws->ws_errno == WRDSE_USERERR) @@ -2049,6 +2250,7 @@ wordsplit_free (struct wordsplit *ws) wordsplit_free_words (ws); free (ws->ws_wordv); ws->ws_wordv = NULL; + wordsplit_free_envbuf (ws); } const char *_wordsplit_errstr[] = { diff --git a/src/wordsplit.h b/src/wordsplit.h index 6a78c48..3c1d533 100644 --- a/src/wordsplit.h +++ b/src/wordsplit.h @@ -19,6 +19,8 @@ #include <stddef.h> +typedef struct wordsplit wordsplit_t; + /* Structure used to direct the splitting. Members marked with [Input] can be defined before calling wordsplit(), those marked with [Output] provide return values when the function returns. If neither mark is @@ -43,7 +45,7 @@ struct wordsplit const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */ const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped with backslash. */ - void (*ws_alloc_die) (struct wordsplit *wsp); + void (*ws_alloc_die) (wordsplit_t *wsp); /* [Input] (WRDSF_ALLOC_DIE) Function called |