aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2014-10-28 15:40:20 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2015-12-17 15:26:28 +0200
commit56a02e741cd8d8b9dce27a79ae9bbcaf1713c4f7 (patch)
tree1153a7dd5c15ab80f1ffa7da4eb9091685222445
parent8383ec3a522a944969b3fc44069a3ff056da554a (diff)
downloadgrecs-56a02e741cd8d8b9dce27a79ae9bbcaf1713c4f7.tar.gz
grecs-56a02e741cd8d8b9dce27a79ae9bbcaf1713c4f7.tar.bz2
Improve wordsplit
* src/wordsplit.c: Implement default assignment, word expansion in variable defaults, distinction between ${variable:-word} and ${variable-word}. * doc/wordsplit.3: New file. * src/wordsplit.h (wordsplit)<ws_envbuf,ws_envidx> <ws_envsiz>: New members. (WRDSF_ARGV): Remove. (WRDSF_OPTIONS): New flag. (WRDSO_ARGV): New option bit. * tests/wordsplit.at: Add new tests. * tests/wsp.c: Set WRDSF_OPTIONS flag if one of the options is requested.
-rw-r--r--doc/wordsplit.3585
-rw-r--r--src/wordsplit.c367
-rw-r--r--src/wordsplit.h37
-rw-r--r--tests/wordsplit.at41
-rw-r--r--tests/wsp.c76
5 files changed, 1002 insertions, 104 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3
new file mode 100644
index 0000000..2f0cced
--- /dev/null
+++ b/doc/wordsplit.3
@@ -0,0 +1,585 @@
+.\" This file is part of grecs -*- nroff -*-
+.\" Copyright (C) 2007, 2009-2014 Sergey Poznyakoff
+.\"
+.\" Grecs is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 3, or (at your option)
+.\" any later version.
+.\"
+.\" Grecs is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>.
+.\"
+.TH WORDSPLIT 3 "October 28, 2014" "GRECS" "Grecs User Reference"
+.SH NAME
+wordsplit \- split string into words
+.SH SYNOPSIS
+.B #include <wordsplit.h>
+.sp
+\fBint wordsplit (const char *\fIs\fB,\
+ wordsplit_t *\fIws\fB, int \fIflags\fB);\fR
+.sp
+\fBint wordsplit_len (const char *\fIs\fB,\
+ \fBsize_t \fIlen\fR,\
+ \fBwordsplit_t *\fIp\fB,\
+ int \fIflags\fB);
+.sp
+\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR
+.sp
+\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR
+.sp
+\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR
+.sp
+\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR
+.sp
+\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
+.SH DESCRIPTION
+The function \fBwordsplit\fR splits the string \fIs\fR into words
+using a set of rules governed by \fIflags\fR and stores the result
+in the memory location pointed to by \fIws\fR. Depending on
+\fIflags\fR, the function performs the following: whitespace trimming,
+tilde expansion, variable expansion, quote removal, command
+substitution, and path expansion. On success, the function returns 0
+and stores the words found in the member \fBws_wordv\fR and the number
+of words in the member \fBws_wordc\fR. On error, -1 is returned and
+error code is stored in \fBws_errno\fR.
+.PP
+The function \fBwordsplit_len\fR acts similarly, except that it
+accesses only first \fBlen\fR bytes of the string \fIs\fR, which is
+not required to be null-terminated.
+.PP
+When no longer needed, the resources allocated by a call to one of
+these functions must be freed using
+.BR wordsplit_free .
+.PP
+The function
+.B wordsplit_free_words
+frees only the memory allocated for elements of
+.I ws_wordv
+and initializes
+.I ws_wordc
+to zero.
+.PP
+The usual calling sequence is:
+.PP
+.EX
+wordsplit_t ws;
+int rc;
+
+if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) {
+ wordsplit_perror(&ws);
+ return;
+}
+for (i = 0; i < ws.ws_wordc; i++) {
+ /* do something with ws.ws_wordv[i] */
+}
+wordsplit_free(&ws);
+.EE
+.PP
+The function
+.B wordsplit_perror
+prints error message from the last invocation of \fBwordsplit\fR. It
+uses the function pointed to by the
+.I ws_error
+member. By default, it outputs the message on the standard error.
+.PP
+For more sophisticated error reporting, the function
+.B wordsplit_strerror
+can be used. It returns a pointer to the string describing the error.
+The caller should treat this pointer as a constant string. It should
+not try to alter or deallocate it.
+.PP
+The function
+.B wordsplit_clearerr
+clears the error condition associated with \fIws\fR.
+.SH EXPANSION
+The number of expansions performed on the input is controlled by
+appropriate bits set in the \fIflags\fR argument. Whatever expansions
+are enabled, they are always run in the same order as described in this
+section.
+.SS Whitespace trimming
+Whitespace trimming removes any leading and trailing whitespace from
+the initial word array. It is enabled by the
+.B WRDSF_WS
+flag. Whitespace trimming is needed only if you redefine
+word delimiters (\fIws_delim\fR member) so that they don't contain
+whitespace characters (\fB\(dq \\t\\n\(dq\fR).
+.SS Tilde expansion
+Tilde expansion is enabled if the
+.B WRDSF_PATHEXPAND
+bit is set. It expands all words that begin with an unquoted tilde
+character (`\fB~\fR'). If tilde is followed immediately by a slash,
+it is replaced with the home directory of the current user (as
+determined by his \fBpasswd\fR entry). A tilde alone is handled the
+same way. Otherwise, the characters between the tilde and first slash
+character (or end of string, if it doesn't contain any) are treated as
+a login name. and are replaced (along with the tilde itself) with the
+home directory of that user. If there is no user with such login
+name, the word is left unchanged.
+.SS Variable expansion
+Variable expansion replaces each occurrence of
+.BI $ NAME
+or
+.BI ${ NAME }
+with the value of the variable \fINAME\fR. It is enabled if the
+flag \fBWRDSF_NOVAR\fR is not set. The caller is responsible for
+supplying the table of available variables. Two mechanisms are
+provided: environment array and a callback function.
+.PP
+Environment array is a \fBNULL\fR-terminated array of variables,
+stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be
+set in order to instruct \fBwordsplit\fR to use this array.
+.PP
+By default, elements of the \fIws_env\fR array have the form
+.IR NAME = VALUE .
+An alternative format is enabled by the
+.B WRDSF_ENV_KV
+flag. When it is set, each variable is described by two consecutive
+elements in the array:
+.IR ws_env [ n ]
+contains variable name, and
+.IR ws_env [ "n+1" ]
+contains its value.
+.PP
+More sophisticated variable tables can be implemented using
+callback function. The \fIws_getvar\fR member should be set to point
+to that function and \fBWRDSF_GETVAR\fR flag must be set. The
+function itself should be defined as
+.EX
+int getvar (char **ret, const char *var, size_t len, void *clos);
+.EE
+.PP
+The function should look up for the variable identified by the first
+\fIlen\fR bytes of the string \fIvar\fR. If such variable is found,
+th function stores a copy of its value (allocated using
+\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and
+returns \fBWRDSE_OK\fR. If the variable is not found, the function
+returns \fBWRDSE_UNDEF\fR. Otherwise, a non NULL error code is
+returned.
+.PP
+If \fIws_getvar\fR returns
+.BR WRDSE_USERERR ,
+it must store the pointer to the error description string in
+.BR *ret .
+In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
+data returned in \fBret\fR must be allocated using
+.BR malloc (3).
+.PP
+If both
+.I ws_env
+and
+.I ws_getvar
+are used, the variable is first looked up in
+.IR ws_env ,
+and if not found there, the
+.I ws_getvar
+function is called.
+.PP
+During variable expansion, the forms below cause
+.B wordsplit
+to test for a variable that is unset or null. Omitting the
+colon results in a test only for a variable that is unset.
+.TP
+.BI ${ variable :- word }
+.BR "Use Default Values" .
+If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted.
+Otherwise, the value of \fIvariable\fR is substituted.
+.TP
+.BI ${ variable := word }
+.BR "Assign Default Values" .
+If \fIvariable\fR is unset or null, the expansion of \fIword\fR is
+assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted.
+.TP
+.BI ${ variable :? word }
+.BR "Display Error if Null or Unset" .
+If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a
+message to that effect if word is not present) is output using
+.IR ws_error .
+Otherwise, the value of \fIvariable\fR is substituted.
+.TP
+.BI ${ variable :+ word }
+.BR "Use Alternate Value" .
+If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
+expansion of \fIword\fR is substituted.
+.SS Quote removal
+.SS Command substitution
+.SS Path expansion
+.SH WORDSPLIT_T STRUCTURE
+The data type \fBwordsplit_t\fR has three members that contain
+output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
+and a number of members that the caller can initialize on input in
+order to customize the function behavior. Each its member has a
+corresponding flag bit, which must be set in the \fIflags\fR argument
+in order to instruct the \fBwordsplit\fR function to use it.
+.SS OUTPUT
+.TP
+.BI size_t " ws_wordc"
+Number of words in \fIws_wordv\fR. Accessible upon successful return
+from \fBwordsplit\fR.
+.TP
+.BI "char ** " ws_wordv
+Array of resulting words. Accessible upon successful return
+from \fBwordsplit\fR.
+.TP
+.BI "int " ws_errno
+Error code, if the invocation of \fBwordsplit\fR or
+\fBwordsplit_len\fR failed. This is the same value as returned from
+the function in that case.
+.SS INPUT
+.TP
+.BI "size_t " ws_offs
+If the
+.B WRDSF_DOOFFS
+flag is set, this member specifies the number of initial elements in
+.I ws_wordv
+to fill with NULLs. These elements are not counted in the returned
+.IR ws_wordc .
+.TP
+.BI "int " ws_flags
+Contains flags passed to wordsplit on entry. Can be used as a
+read-only member when using \fBwordsplit\fR in incremental mode or
+in a loop with
+.B WRDSF_REUSE
+flag set.
+.TP
+.BI "int " ws_options
+Additional options used when
+.B WRDSF_OPTIONS
+is set.
+.TP
+.BI "const char *" ws_delim
+Word delimiters. If initialized on input, the
+.B WRDSF_DELIM
+flag must be set. Otherwise, it is initialized on entry to
+.B wordsplit
+with the string \fB\(dq \\t\\n\(dq\fR.
+.TP
+.BI "const char *" ws_comment
+A zero-terminated string of characters that begin an inline comment.
+If initialized on input, the
+.B WRDSF_COMMENT
+flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
+.TP
+.BI "const char *" ws_escape
+Characters to be escaped with backslash. The
+.B WRDSF_ESCAPE
+flag must be set if this member is initialized.
+.TP
+.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
+This function is called when
+.B wordsplit
+is unable to allocate memory and the
+.B WRDSF_ENOMEMABRT
+flag was set. The default function prints a
+message on standard error and aborts. This member can be used
+to customize error handling. If initialized, the
+.B WRDSF_ALLOC_DIE
+flag must be set.
+.TP
+.BI "void (*" ws_error ") (const char *, ...)"
+Pointer to function used for error reporting. The invocation
+convention is the same as for
+.BR printf (3).
+The default function formats and prints the message on the standard
+error.
+
+If this member is initialized, the
+.B WRDSF_ERROR
+flag must be set.
+.TP
+.BI "void (*" ws_debug ") (const char *, ...)"
+Pointer to function used for debugging output. By default it points
+to the same function as
+.BR ws_error .
+If initialized, the
+.B WRDSF_DEBUG
+flag must be set.
+.TP
+.BR "const char **" ws_env
+A \fBNULL\fR-terminated array of environment variables. It is used
+during variable expansion. If set, the
+.B WRDSF_ENV
+flag must be set. Variable expansion is enabled only if either
+.B WRDSF_ENV
+or
+.B WRDSF_GETVAR
+(see below) is set, and
+.B WRDSF_NOVAR
+flag is not set.
+
+Each element of
+.I ws_env
+must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is
+the name of the variable, and \fIVALUE\fR is its value.
+Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is
+described by two elements of
+.IR ws_env : one containing variable name, and the next one with its
+value.
+.TP
+.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)"
+Points to the function that will be used during variable expansion to
+look up for the value of the environment variable named \fBvar\fR.
+This function is used if the variable expansion is enabled (i.e. the
+.B WRDSF_NOVAR
+flag is not set), and the \fBWRDSF_GETVAR\fR flag is set.
+
+If both
+.B WRDSF_ENV
+and
+.B WRDSF_GETVAR
+are set, the variable is first looked up in the
+.I ws_env
+array and, if not found there,
+.I ws_getvar
+is called.
+
+The name of the variable is specified by the first \fIlen\fR bytes of
+the string \fIvar\fR. The \fIclos\fR parameter supplies the
+user-specific data (see below the description of \fIws_closure\fR
+member) and the \fBret\fR parameter points to the memory location used
+for output data. On success, the function must store ther a pointer
+to the string with the value of the variable and return 0. On error,
+it must return one of the error codes described in the section
+.BR "ERROR CODES" .
+If \fIws_getvar\fR returns
+.BR WRDSE_USERERR ,
+it must store the pointer to the error description string in
+.BR *ret .
+In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
+data returned in \fBret\fR must be allocated using
+.BR malloc (3).
+.TP
+.BI "void *" ws_closure
+Additional user-specific data passed as the last argument to
+.I ws_getvar
+or
+.I ws_command
+(see below). If defined, the
+.B WRDSF_CLOSURE
+flag must be set.
+.TP
+\fBint (*\fIws_command\fB)\
+ (char **ret,\
+ const char *cmd,\
+ size_t len,\
+ char **argv,\
+ void *clos)
+Pointer to the function that performs command substitution. It treats
+the first \fIlen\fR bytes of the string \fIcmd\fR as a command
+(whatever it means for the caller) and attempts to execute it. On
+success, a pointer to the string with the command output is stored
+in the memory location pointed to by \fBret\fR and \fB0\fR is
+returned. On error,
+the function must return one of the error codes described in the section
+.BR "ERROR CODES" .
+If \fIws_getvar\fR returns
+.BR WRDSE_USERERR ,
+it must store the pointer to the error description string in
+.BR *ret .
+In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
+data returned in \fBret\fR must be allocated using
+.BR malloc (3).
+
+If the
+.I WRDSO_ARGV
+option is set, the parameter \fBargv\fR contains the command split into
+words using the same settings as the input \fIws\fR structure, with
+command substitution disabled.
+
+The \fIclos\fR parameter supplies user-specific data (see the
+description of \fIws_closure\fR member).
+.SH FLAGS
+The following macros are defined for use in the \fBflags\fR argument.
+.TP
+.B WRDSF_DEFFLAGS
+Default flags. This is a shortcut for:
+
+\fB(WRDSF_NOVAR |\
+ WRDSF_NOCMD |\
+ WRDSF_QUOTE |\
+ WRDSF_SQUEEZE_DELIMS |\
+ WRDSF_CESCAPES)\fR,
+
+i.e.: disable variable expansion and quote substituton, perform quote
+removal, treat any number of consequtive delimiters as a single
+delimiter, replace \fBC\fR escapes appearing in the input string with
+the corresponding characters.
+.TP
+.B WRDSF_APPEND
+Append the words found to the array resulting from a previous call to
+\fBwordsplit\fR.
+.TP
+.B WRDSF_DOOFFS
+Insert
+.I ws_offs
+initial
+.BR NULL s
+in the array
+.IR ws_wordv .
+These are not counted in the returned
+.IR ws_wordc .
+.TP
+.B WRDSF_NOCMD
+Don't do command substitution.
+.TP
+.B WRDSF_REUSE
+The parameter \fIws\fR resulted from a previous call to
+\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the
+allocated storage.
+.TP
+.B WRDSF_SHOWERR
+Print errors using
+.BR ws_error .
+.TP
+.B WRDSF_UNDEF
+Consider it an error if an undefined variable is expanded.
+.TP
+.B WRDSF_NOVAR
+Don't do variable expansion.
+.TP
+.B WRDSF_ENOMEMABRT
+Abort on
+.B ENOMEM
+error. By default, out of memory errors are treated as any other
+errors: the error is reported using \fIws_error\fR if the
+.B WRDSF_SHOWERR
+flag is set, and error code is returned. If this flag is set, the
+.B ws_alloc_die
+function is called instead. This function is not supposed to return.
+.TP
+.B WRDSF_WS
+Trim off any leading and trailind whitespace from the returned
+words. This flag is useful if the \fIws_delim\fR member does not
+contain whitespace characters.
+.TP
+.B WRDSF_SQUOTE
+Handle single quotes.
+.TP
+.B WRDSF_DQUOTE
+Handle double quotes.
+.TP
+.B WRDSF_QUOTE
+A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR.
+.TP
+.B WRDSF_SQUEEZE_DELIMS
+Replace each input sequence of repeated delimiters with a single
+delimiter.
+.TP
+.B WRDSF_RETURN_DELIMS
+Return delimiters.
+.TP
+.B WRDSF_SED_EXPR
+Treat
+.BR sed (1) expressions as words.
+.TP
+.B WRDSF_DELIM
+.I ws_delim
+member is initialized.
+.TP
+.B WRDSF_COMMENT
+.I ws_comment
+member is initialized.
+.TP
+.B WRDSF_ALLOC_DIE
+.I ws_alloc_die
+member is initialized.
+.TP
+.B WRDSF_ERROR
+.I ws_error
+member is initialized.
+.TP
+.B WRDSF_DEBUG
+.I ws_debug
+member is initialized.
+.TP
+.B WRDSF_ENV
+.I ws_env
+member is initialized.
+.TP
+.B WRDSF_GETVAR
+.I ws_getvar member is initialized.
+.TP
+.B WRDSF_SHOWDBG
+Enable debugging.
+.TP
+.B WRDSF_NOSPLIT
+Don't split input into words. This flag is is useful for side
+effects, e.g. to perform variable expansion within a string.
+.TP
+.B WRDSF_KEEPUNDEF
+Keep undefined variables in place, instead of expanding them to
+empty strings.
+.TP
+.B WRDSF_WARNUNDEF
+Warn about undefined variables.
+.TP
+.B WRDSF_CESCAPES
+Handle \fBC\fR-style escapes in the input string.
+.TP
+.B WRDSF_CLOSURE
+.I ws_closure
+is set.
+.TP
+.B WRDSF_ENV_KV
+Each two consecutive elements in the
+.I ws_env
+array describe a single variable:
+.IR ws_env [ n ]
+contains variable name, and
+.IR ws_env [ "n+1" ]
+contains its value.
+.TP
+.B WRDSF_ESCAPE 0x10000000
+.I ws_escape
+is set.
+.TP
+.B WRDSF_INCREMENTAL
+Incremental mode. Each subsequent call to \fBwordsplit\fR with
+\fBNULL\fR as its first argument parses the next word from the input.
+See the section
+.B INCREMENTAL MODE
+for a detailed discussion.
+.TP
+.B WRDSF_PATHEXPAND
+Perform pathname and tilde expansion. If this flag is set, the
+\fIws_options\fR member must also be initialized. See the
+subsection
+.B "Pathname expansion"
+for details.
+.TP
+.B WRDSF_OPTIONS
+The
+.I ws_options
+member is initialized.
+.SH "RETURN VALUE"
+.SH EXAMPLE
+.SH "SEE ALSO"
+.SH AUTHORS
+Sergey Poznyakoff
+.SH "BUG REPORTS"
+Report bugs to <gray+grecs@gnu.org.ua>.
+.SH COLOPHON
+The \fBGrecs\fR library is constantly changing, so this manual page
+may be incorrect or out-of-date. For the latest copy of \fBGrecs\fR
+documentation, visit <http://www.gnu.org.ua/software/grecs>.
+.SH COPYRIGHT
+Copyright \(co 2011 Sergey Poznyakoff
+.br
+.na
+License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
+.br
+.ad
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
+.\" Local variables:
+.\" eval: (add-hook 'write-file-hooks 'time-stamp)
+.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \""
+.\" time-stamp-format: "%:B %:d, %:y"
+.\" time-stamp-end: "\""
+.\" time-stamp-line-limit: 20
+.\" end:
+
diff --git a/src/wordsplit.c b/src/wordsplit.c
index 4a69725..c726239 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -114,11 +114,30 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
wss->ws_error = wsp->ws_error;
wss->ws_alloc_die = wsp->ws_alloc_die;
+ if (!(flags & WRDSF_NOVAR))
+ {
+ wss->ws_env = wsp->ws_env;
+ wss->ws_getvar = wsp->ws_getvar;
+ flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
+ }
+ if (!(flags & WRDSF_NOCMD))
+ {
+ wss->ws_command = wsp->ws_command;
+ }
+
+ if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
+ {
+ wss->ws_closure = wsp->ws_closure;
+ flags |= wsp->ws_flags & WRDSF_CLOSURE;
+ }
+
+ wss->ws_options = wsp->ws_options;
+
flags |= WRDSF_DELIM
| WRDSF_ALLOC_DIE
| WRDSF_ERROR
| WRDSF_DEBUG
- | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR));
+ | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
return wordsplit_run (str, len, wss, flags, wsp->ws_lvl + 1);
}
@@ -168,12 +187,11 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_ERROR))
wsp->ws_error = _wsplt_error;
- if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+ if (!(wsp->ws_flags & WRDSF_NOVAR))
{
- _wsplt_seterr (wsp, WRDSE_USAGE);
- errno = EINVAL;
- return wsp->ws_errno;
+ /* These will be initialized on first variable assignment */
+ wsp->ws_envidx = wsp->ws_envsiz = 0;
+ wsp->ws_envbuf = NULL;
}
if (!(wsp->ws_flags & WRDSF_NOCMD))
@@ -214,6 +232,9 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_CLOSURE))
wsp->ws_closure = NULL;
+ if (!(wsp->ws_flags & WRDSF_OPTIONS))
+ wsp->ws_options = 0;
+
wsp->ws_endp = 0;
wordsplit_init0 (wsp);
@@ -717,13 +738,14 @@ find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
return 1;
}
-static const char *
-wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
+static int
+wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
+ char const **ret)
{
size_t i;
if (!(wsp->ws_flags & WRDSF_ENV))
- return NULL;
+ return WRDSE_UNDEF;
if (wsp->ws_flags & WRDSF_ENV_KV)
{
@@ -732,14 +754,17 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
{
size_t elen = strlen (wsp->ws_env[i]);
if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
- return wsp->ws_env[i + 1];
+ {
+ *ret = wsp->ws_env[i + 1];
+ return WRDSE_OK;
+ }
/* Skip the value. Break the loop if it is NULL. */
i++;
if (wsp->ws_env[i] == NULL)
break;
}
}
- else
+ else if (wsp->ws_env)
{
/* Usual (A=B) environment. */
for (i = 0; wsp->ws_env[i]; i++)
@@ -751,10 +776,117 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (name[j] != var[j])
break;
if (j == len && var[j] == '=')
- return var + j + 1;
+ {
+ *ret = var + j + 1;
+ return WRDSE_OK;
+ }
+ }
+ }
+ return WRDSE_UNDEF;
+}
+
+static int
+wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
+ char *value)
+{
+ int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
+ char *v;
+
+ if (wsp->ws_envidx + n >= wsp->ws_envsiz)
+ {
+ size_t sz;
+ char **newenv;
+
+ if (!wsp->ws_envbuf)
+ {
+ if (wsp->ws_flags & WRDSF_ENV)
+ {
+ size_t i = 0, j;
+
+ if (wsp->ws_env)
+ {
+ for (; wsp->ws_env[i]; i++)
+ ;
+ }
+
+ sz = i + n + 1;
+
+ newenv = calloc (sz, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+
+ for (j = 0; j < i; j++)
+ {
+ newenv[j] = strdup (wsp->ws_env[j]);
+ if (!newenv[j])
+ {
+ for (; j > 1; j--)
+ free (newenv[j-1]);
+ free (newenv[j-1]);
+ return _wsplt_nomem (wsp);
+ }
+ }
+ newenv[j] = NULL;
+
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = i;
+ wsp->ws_envsiz = sz;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ else
+ {
+ newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = 0;
+ wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ wsp->ws_flags |= WRDSF_ENV;
+ }
+ }
+ else
+ {
+ wsp->ws_envsiz *= 2;
+ newenv = realloc (wsp->ws_envbuf,
+ wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
}
}
- return NULL;
+
+ if (wsp->ws_flags & WRDSF_ENV_KV)
+ {
+ /* A key-value pair environment */
+ char *p = malloc (namelen + 1);
+ if (!p)
+ return _wsplt_nomem (wsp);
+ memcpy (p, name, namelen);
+ p[namelen] = 0;
+
+ v = strdup (value);
+ if (!v)
+ {
+ free (p);
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_env[wsp->ws_envidx++] = p;
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ else
+ {
+ v = malloc (namelen + strlen(value) + 2);
+ if (!v)
+ return _wsplt_nomem (wsp);
+ memcpy (v, name, namelen);
+ v[namelen++] = '=';
+ strcpy(v + namelen, value);
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ wsp->ws_env[wsp->ws_envidx++] = NULL;
+ return WRDSE_OK;
}
static int
@@ -767,7 +899,9 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
const char *vptr;
struct wordsplit_node *newnode;
const char *start = str - 1;
-
+ int rc;
+ struct wordsplit ws;
+
if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
@@ -780,22 +914,35 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
str++;
len--;
for (i = 1; i < len; i++)
- if (str[i] == '}' || str[i] == ':')
- break;
- if (str[i] == ':')
- {
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_paren (str, i + 1, len, &j, "{}"))
- return _wsplt_seterr (wsp, WRDSE_CBRACE);
- *pend = str + j;
- }
- else if (str[i] == '}')
{
- *pend = str + i;
+ if (str[i] == ':')
+ {
+ size_t j;
+
+ defstr = str + i + 1;
+ if (find_closing_paren (str, i + 1, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
+ }
+ else if (str[i] == '}')
+ {
+ defstr = NULL;
+ *pend = str + i;
+ break;
+ }
+ else if (strchr ("-+?=", str[i]))
+ {
+ size_t j;
+
+ defstr = str + i;
+ if (find_closing_paren (str, i, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
+ }
}
- else
+ if (i == len)
return _wsplt_seterr (wsp, WRDSE_CBRACE);
}
else
@@ -820,64 +967,99 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
i - its length
defstr - default replacement str */
- vptr = wordsplit_find_env (wsp, str, i);
- if (vptr)
+ if (defstr && strchr("-+?=", defstr[0]) == 0)
{
- value = strdup (vptr);
- if (!value)
- return _wsplt_nomem (wsp);
+ rc = WRDSE_UNDEF;
+ defstr = NULL;
}
- else if (wsp->ws_flags & WRDSF_GETVAR)
+ else
{
- int rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
- switch (rc)
+ rc = wordsplit_find_env (wsp, str, i, &vptr);
+ if (rc == WRDSE_OK)
{
- case WRDSE_OK:
- break;
-
- case WRDSE_NOSPACE:
- return _wsplt_nomem (wsp);
-
- case WRDSE_UNDEF:
- value = NULL;
- break;
+ value = strdup (vptr);
+ if (!value)
+ rc = WRDSE_NOSPACE;
+ }
+ else if (wsp->ws_flags & WRDSF_GETVAR)
+ rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
+ else
+ rc = WRDSE_UNDEF;
- case WRDSE_USERERR:
- if (wsp->ws_errno == WRDSE_USERERR)
- free (wsp->ws_usererr);
- wsp->ws_usererr = value;
- /* fall through */
- default:
- _wsplt_seterr (wsp, rc);
- return 1;
+ if (rc == WRDSE_OK && value[0] == 0 && defstr && defstr[-1] == ':')
+ {
+ free (value);
+ rc = WRDSE_UNDEF;
}
}
- else
- value = NULL;
- if (!value)
+ switch (rc)
{
+ case WRDSE_OK:
+ if (defstr && *defstr == '+')
+ {
+ size_t size = *pend - ++defstr;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)));
+ if (rc)
+ return rc;
+ free (value);
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+ }
+ break;
+
+ case WRDSE_UNDEF:
if (defstr)
{
size_t size;
- if (*defstr == '-')
+ if (*defstr == '-' || *defstr == '=')
{
size = *pend - ++defstr;
- value = malloc (size + 1);
- if (!value)
- return _wsplt_nomem (wsp);
- memcpy (value, defstr, size);
- value[size] = 0;
+
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)));
+ if (rc)
+ return rc;
+
+ value = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ wordsplit_free (&ws);
+
+ if (defstr[-1] == '=')
+ wsplt_assign_var (wsp, str, i, value);
}
- else if (*defstr == '?')
+ else
{
- size = *pend - ++defstr;
- if (size == 0)
- wsp->ws_error (_("%.*s: variable null or not set"),
- (int) i, str);
- else
- wsp->ws_error ("%.*s: %.*s",
- (int) i, str, (int) size, defstr);
+ if (*defstr == '?')
+ {
+ size = *pend - ++defstr;
+ if (size == 0)
+ wsp->ws_error (_("%.*s: variable null or not set"),
+ (int) i, str);
+ else
+ {
+ rc = _wsplt_subsplit (wsp, &ws, defstr, size,
+ WRDSF_NOSPLIT | WRDSF_WS |
+ WRDSF_QUOTE |
+ (wsp->ws_flags &
+ (WRDSF_NOVAR | WRDSF_NOCMD)));
+ if (rc == 0)
+ wsp->ws_error ("%.*s: %s",
+ (int) i, str, ws.ws_wordv[0]);
+ else
+ wsp->ws_error (_("%.*s: %.*s"),
+ (int) i, str, (int) size, defstr);
+ wordsplit_free (&ws);
+ }
+ }
+ value = NULL;
}
}
else if (wsp->ws_flags & WRDSF_UNDEF)
@@ -899,18 +1081,21 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
return _wsplt_nomem (wsp);
}
}
+ break;
+
+ case WRDSE_NOSPACE:
+ return _wsplt_nomem (wsp);
+
+ case WRDSE_USERERR:
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_usererr = value;
+ /* fall through */
+ default:
+ _wsplt_seterr (wsp, rc);
+ return 1;
}
- else if (defstr && *defstr == '+')
- {
- size_t size = *pend - ++defstr;
- free (value);
- value = malloc (size + 1);
- if (!value)
- return _wsplt_nomem (wsp);
- memcpy (value, defstr, size);
- value[size] = 0;
- }
-
+
if (value)
{
if (flg & _WSNF_QUOTE)
@@ -1111,7 +1296,7 @@ expcmd (struct wordsplit *wsp, const char *str, size_t len,
}
*pend = str + j;
- if (wsp->ws_flags & WRDSF_ARGV)
+ if (wsp->ws_options & WRDSO_ARGV)
{
struct wordsplit ws;
@@ -2034,6 +2219,23 @@ wordsplit_free_words (struct wordsplit *ws)
}
void
+wordsplit_free_envbuf (struct wordsplit *ws)
+{
+ if (ws->ws_flags & WRDSF_NOCMD)
+ return;
+ if (ws->ws_envbuf)
+ {
+ size_t i;
+
+ for (i = 0; ws->ws_envbuf[i]; i++)
+ free (ws->ws_envbuf[i]);
+ free (ws->ws_envbuf);
+ ws->ws_envidx = ws->ws_envsiz = 0;
+ ws->ws_envbuf = NULL;
+ }
+}
+
+void
wordsplit_clearerr (struct wordsplit *ws)
{
if (ws->ws_errno == WRDSE_USERERR)
@@ -2048,6 +2250,7 @@ wordsplit_free (struct wordsplit *ws)
wordsplit_free_words (ws);
free (ws->ws_wordv);
ws->ws_wordv = NULL;
+ wordsplit_free_envbuf (ws);
}
const char *_wordsplit_errstr[] = {
diff --git a/src/wordsplit.h b/src/wordsplit.h
index 6a78c48..3c1d533 100644
--- a/src/wordsplit.h
+++ b/src/wordsplit.h
@@ -19,6 +19,8 @@
#include <stddef.h>
+typedef struct wordsplit wordsplit_t;
+
/* Structure used to direct the splitting. Members marked with [Input]
can be defined before calling wordsplit(), those marked with [Output]
provide return values when the function returns. If neither mark is
@@ -43,7 +45,7 @@ struct wordsplit
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
with backslash. */
- void (*ws_alloc_die) (struct wordsplit *wsp);
+ void (*ws_alloc_die) (wordsplit_t *wsp);
/* [Input] (WRDSF_ALLOC_DIE) Function called when
out of memory. Must not return. */
void (*ws_error) (const char *, ...)
@@ -56,6 +58,11 @@ struct wordsplit
output. */
const char **ws_env; /* [Input] (WRDSF_ENV, !WRDSF_NOVAR) Array of
environment variables. */
+
+