diff options
Diffstat (limited to 'doc/wordsplit.3')
-rw-r--r-- | doc/wordsplit.3 | 1036 |
1 files changed, 0 insertions, 1036 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3 deleted file mode 100644 index 1a6f80b..0000000 --- a/doc/wordsplit.3 +++ /dev/null @@ -1,1036 +0,0 @@ -.\" This file is part of grecs -*- nroff -*- -.\" Copyright (C) 2007-2018 Sergey Poznyakoff -.\" -.\" Grecs is free software; you can redistribute it and/or modify -.\" it under the terms of the GNU General Public License as published by -.\" the Free Software Foundation; either version 3, or (at your option) -.\" any later version. -.\" -.\" Grecs is distributed in the hope that it will be useful, -.\" but WITHOUT ANY WARRANTY; without even the implied warranty of -.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -.\" GNU General Public License for more details. -.\" -.\" You should have received a copy of the GNU General Public License -.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>. -.\" -.TH WORDSPLIT 3 "May 22, 2018" "GRECS" "Grecs User Reference" -.SH NAME -wordsplit \- split string into words -.SH SYNOPSIS -.B #include <wordsplit.h> -.sp -\fBint wordsplit (const char *\fIs\fB,\ - wordsplit_t *\fIws\fB, int \fIflags\fB);\fR -.sp -\fBint wordsplit_len (const char *\fIs\fB,\ - \fBsize_t \fIlen\fR,\ - \fBwordsplit_t *\fIp\fB,\ - int \fIflags\fB); -.sp -\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR -.sp -\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR -.sp -\fBvoid wordsplit_getwords (wordsplit_t *\fIws\fB,\ - int *\fIwordc\fB, char ***\fIwordv\fB); -.sp -\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR -.sp -\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR -.sp -\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR -.SH DESCRIPTION -The function \fBwordsplit\fR splits the string \fIs\fR into words -using a set of rules governed by \fIflags\fR. Depending on -\fIflags\fR, the function performs the following operations: -whitespace trimming, tilde expansion, variable expansion, quote -removal, command substitution, and path expansion. On success, -\fBwordsplit\fR returns 0 and stores the words found in the member -\fBws_wordv\fR and the number of words in the member \fBws_wordc\fR. -On error, a non-zero error code is returned. -.PP -The function \fBwordsplit_len\fR acts similarly, except that it -accesses only first \fBlen\fR bytes of the string \fIs\fR, which is -not required to be null-terminated. -.PP -When no longer needed, the resources allocated by a call to one of -these functions must be freed using -.BR wordsplit_free . -.PP -The function -.B wordsplit_free_words -frees only the memory allocated for elements of -.I ws_wordv -and initializes -.I ws_wordc -to zero. -.PP -The usual calling sequence is: -.PP -.EX -wordsplit_t ws; -int rc; - -if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) { - wordsplit_perror(&ws); - return; -} -for (i = 0; i < ws.ws_wordc; i++) { - /* do something with ws.ws_wordv[i] */ -} -wordsplit_free(&ws); -.EE -.PP -The function -.B wordsplit_getwords -returns in \fIwordv\fR an array of words, and in \fIwordc\fR the number -of elements in \fIwordv\fR. The array can be used after calling -.BR wordsplit_free . -The caller becomes responsible for freeing the memory allocated for -each element of the array and the array pointer itself. -.PP -The function -.B wordsplit_perror -prints error message from the last invocation of \fBwordsplit\fR. It -uses the function pointed to by the -.I ws_error -member. By default, it outputs the message on the standard error. -.PP -For more sophisticated error reporting, the function -.B wordsplit_strerror -can be used. It returns a pointer to the string describing the error. -The caller should treat this pointer as a constant string. It should -not try to alter or deallocate it. -.PP -The function -.B wordsplit_clearerr -clears the error condition associated with \fIws\fR. -.SH INCREMENTAL MODE -In incremental mode \fBwordsplit\fR parses one word per invocation. -It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when -entire input string has been processed. -.PP -This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in -the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must -have \fBNULL\fR as first argument. Each successful -call will return exactly one word in \fBws.ws_wordv[0]\fR. -.PP -An example usage: -.PP -.EX -wordsplit_t ws; -int rc; -flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL; - -for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK; - rc = wordsplit(NULL, &ws, flags)) { - process(ws.ws_wordv[0]); -} - -if (rc != WRDSE_NOINPUT) - wordsplit_perror(&ws); - -wordsplit_free(&ws); -.EE -.SH OPTIONS -The number of flags is limited to 32 (the width of \fBuint32_t\fR data -type) and each bit is occupied by a corresponding flag. However, the -number of features \fBwordsplit\fR provides required still -more. Additional features can be requested by setting a corresponding -\fIoption bit\fR in the \fBws_option\fR field of the \fBstruct -wordsplit\fR argument. To inform wordsplit functions that this field -is initialized the \fBWRDSF_OPTIONS\fR flag must be set. -.PP -Option symbolic names begin with \fBWRDSO_\fR. They are discussed in -detail in the subsequent chapters. -.SH EXPANSION -Expansion is performed on the input after it has been split into -words. There are several kinds of expansion, which of them are -performed is controlled by appropriate bits set in the \fIflags\fR -argument. Whatever expansion kinds are enabled, they are always run -in the same order as described in this section. -.SS Whitespace trimming -Whitespace trimming removes any leading and trailing whitespace from -the initial word array. It is enabled by the -.B WRDSF_WS -flag. Whitespace trimming is needed only if you redefine -word delimiters (\fIws_delim\fR member) so that they don't contain -whitespace characters (\fB\(dq \\t\\n\(dq\fR). -.SS Tilde expansion -Tilde expansion is enabled if the -.B WRDSF_PATHEXPAND -bit is set. It expands all words that begin with an unquoted tilde -character (`\fB~\fR'). If tilde is followed immediately by a slash, -it is replaced with the home directory of the current user (as -determined by his \fBpasswd\fR entry). A tilde alone is handled the -same way. Otherwise, the characters between the tilde and first slash -character (or end of string, if it doesn't contain any) are treated as -a login name. and are replaced (along with the tilde itself) with the -home directory of that user. If there is no user with such login -name, the word is left unchanged. -.SS Variable expansion -Variable expansion replaces each occurrence of -.BI $ NAME -or -.BI ${ NAME } -with the value of the variable \fINAME\fR. It is enabled if the -flag \fBWRDSF_NOVAR\fR is not set. The caller is responsible for -supplying the table of available variables. Two mechanisms are -provided: environment array and a callback function. -.PP -Environment array is a \fBNULL\fR-terminated array of variables, -stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be -set in order to instruct \fBwordsplit\fR to use this array. -.PP -By default, elements of the \fIws_env\fR array have the form -.IR NAME = VALUE . -An alternative format is enabled by the -.B WRDSF_ENV_KV -flag. When it is set, each variable is described by two consecutive -elements in the array: -.IR ws_env [ n ] -containing the variable name, and -.IR ws_env [ "n+1" ] -containing its value. If the latter is \fBNULL\fR, the corresponding -variable is undefined. -.PP -More sophisticated variable tables can be implemented using -callback function. The \fIws_getvar\fR member should be set to point -to that function and \fBWRDSF_GETVAR\fR flag must be set. The -function itself shall be defined as -.PP -.EX -int getvar (char **ret, const char *var, size_t len, void *clos); -.EE -.PP -The function shall look up for the variable identified by the first -\fIlen\fR bytes of the string \fIvar\fR. If such variable is found, -the function shall store a copy of its value (allocated using -\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and -return \fBWRDSE_OK\fR. If the variable is not found, the function shall -return \fBWRDSE_UNDEF\fR. Otherwise, a non-zero error code shall be -returned. -.PP -If \fIws_getvar\fR returns -.BR WRDSE_USERERR , -it must store the pointer to the error description string in -.BR *ret . -In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the -data returned in \fBret\fR must be allocated using -.BR malloc (3). -.PP -If both -.I ws_env -and -.I ws_getvar -are used, the variable is first looked up in -.IR ws_env , -and if not found there, the -.I ws_getvar -function is called. -.PP -During variable expansion, the forms below cause -.B wordsplit -to test for a variable that is unset or null. Omitting the -colon results in a test only for a variable that is unset. -.TP -.BI ${ variable :- word } -.BR "Use Default Values" . -If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted. -Otherwise, the value of \fIvariable\fR is substituted. -.TP -.BI ${ variable := word } -.BR "Assign Default Values" . -If \fIvariable\fR is unset or null, the expansion of \fIword\fR is -assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted. -.TP -.BI ${ variable :? word } -.BR "Display Error if Null or Unset" . -If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a -message to that effect if word is not present) is output using -.IR ws_error . -Otherwise, the value of \fIvariable\fR is substituted. -.TP -.BI ${ variable :+ word } -.BR "Use Alternate Value" . -If \fIvariable\fR is null or unset, nothing is substituted, otherwise the -expansion of \fIword\fR is substituted. -.SS Quote removal -Quote removal translates unquoted escape sequences into corresponding bytes. -An escape sequence is a backslash followed by one or more characters. By -default, each sequence \fB\\\fIC\fR appearing in unquoted words is -replaced with the character \fIC\fR. In doubly-quoted strings, two -backslash sequences are recognized: \fB\\\\\fR translates to a single -backslash, and \fB\\\(dq\fR translates to a double-quote. -.PP -Two flags are provided to modify this behavior. If -.I WRDSF_CESCAPES -flag is set, the following escape sequences are recognized: -.sp -.nf -.ta 8n 18n 42n -.ul - Sequence Expansion ASCII - \fB\\\\\fR \fB\\\fR 134 - \fB\\\(dq\fR \fB\(dq\fR 042 - \fB\\a\fR audible bell 007 - \fB\\b\fR backspace 010 - \fB\\f\fR form-feed 014 - \fB\\n\fR new line 012 - \fB\\r\fR charriage return 015 - \fB\\t\fR horizontal tabulation 011 - \fB\\v\fR vertical tabulation 013 -.fi -.sp -The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands -for a two-digit hex number is replaced with ASCII character \fINN\fR. -The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit -octal number is replaced with ASCII character whose code is \fINNN\fR. -.PP -The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape -sequences. If it is set, the \fBws_escape\fR member must be -initialized. This member provides escape tables for unquoted words -(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each -table is a string consisting of even number of charactes. In each -pair of characters, the first one is a character that can appear after -backslash, and the following one is its translation. For example, the -above table of C escapes is represented as -\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR. -.PP -It is valid to initialize \fBws_escape\fR elements to zero. In this -case, no backslash translation occurs. -.PP -The handling of octal and hex escapes is controlled by the following -bits in \fBws_options\fR: -.TP -.B WRDSO_BSKEEP_WORD -When an unrecognized escape sequence is encountered in a word, -preserve it on output. If that bit is not set, the backslash is -removed from such sequences. -.TP -.B WRDSO_OESC_WORD -Handle octal escapes in words. -.TP -.B WRDSO_XESC_WORD -Handle hex escapes in words. -.TP -.B WRDSO_BSKEEP_QUOTE -When an unrecognized escape sequence is encountered in a doubly-quoted -string, preserve it on output. If that bit is not set, the backslash is -removed from such sequences. -.TP -.B WRDSO_OESC_QUOTE -Handle octal escapes in doubly-quoted strings. -.TP -.B WRDSO_XESC_QUOTE -Handle hex escapes in doubly-quoted strings. -.SS Command substitution -During \fIcommand substitution\fR, each word is scanned for commands. -Each command found is executed and replaced by the output it creates. -.PP -The syntax is: -.PP -.RS +4 -.BI $( command ) -.RE -.PP -Command substitutions may be nested. -.PP -Unless the substitution appears within double quotes, word splitting and -pathname expansion are performed on its result. -.PP -To enable command substitution, the caller must initialize the -.I ws_command -member with the address of the substitution function and make sure the -.B WRDSF_NOCMD -flag is not set. -.PP -The substitution function should be defined as follows: -.PP -.RS +4 -\fBint \fIcommand\fB\ - (char **\fIret\fB,\ - const char *\fIcmd\fB,\ - size_t \fIlen,\fB\ - char **\fIargv\fB,\ - void *\fIclos\fB);\fR -.RE -.PP -First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as -it appeared between -.BR $( and ), -with all expansions performed. -.PP -The \fIargv\fR parameter contains the command -line split into words using the same settings as the input \fIws\fR structure. -.PP -The \fIclos\fR parameter supplies user-specific data, passed in the -\fIws_closure\fR member). -.PP -On success, the function stores a pointer to the -output string in the memory location pointed to by \fIret\fR and -returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the -error codes described in the section -.BR "ERROR CODES" . -If -.BR WRDSE_USERERR , -is returned, a pointer to the error description string must be stored in -.BR *ret . -.PP -When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the -data stored in \fB*ret\fR must be allocated using -.BR malloc (3). -.SS Pathname expansion -Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is -set. Each unquoted word is scanned for characters -.BR * , ? ", and " [ . -If one of these appears, the word is considered a \fIpattern\fR (in -the sense of -.BR glob (3)) -and is replaced with an alphabetically sorted list of file names matching the -pattern. -.PP -If no matches are found for a word -and the \fIws_options\fR member has the -.B WRDSO_NULLGLOB -bit set, the word is removed. -.PP -If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output -for each such word using -.IR ws_error . -.PP -When matching a pattern, the dot at the start of a name or immediately -following a slash must be matched explicitly, unless -the \fBWRDSO_DOTGLOB\fR option is set. -.SH LIMITING THE NUMBER OF WORDS -The maximum number of words to be returned can be limited by setting -the \fBws_maxwords\fR member to the desired count, and setting the -\fBWRDSO_MAXWORDS\fR option, e.g.: -.sp -.EX -struct wordsplit ws; -ws.ws_maxwords = 3; -ws.ws_options = WRDSO_MAXWORDS; -wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS); -.EE -.PP -If the actual number of words in the expanded input is greater than -the supplied limit, the trailing part of the input will be returned in -the last word. For example, if the input to the above fragment were -\fBNow is the time for all good men\fR, then the returned words would be: -.sp -.EX -"Now" -"is" -"the time for all good men" -.EE -.SH WORDSPLIT_T STRUCTURE -The data type \fBwordsplit_t\fR has three members that contain -output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR, -and a number of members that the caller can initialize on input in -order to customize the function behavior. Each its member has a -corresponding flag bit, which must be set in the \fIflags\fR argument -in order to instruct the \fBwordsplit\fR function to use it. -.SS OUTPUT -.TP -.BI size_t " ws_wordc" -Number of words in \fIws_wordv\fR. Accessible upon successful return -from \fBwordsplit\fR. -.TP -.BI "char ** " ws_wordv -Array of resulting words. Accessible upon successful return -from \fBwordsplit\fR. -.TP -.BI "size_t " ws_wordi -Total number of words processed. This field is intended for use with -.B WRDSF_INCREMENTAL -flag. If that flag is not set, the following relation holds: -.BR "ws_wordi == ws_wordc - ws_offs" . -.TP -.BI "int " ws_errno -Error code, if the invocation of \fBwordsplit\fR or -\fBwordsplit_len\fR failed. This is the same value as returned from -the function in that case. -.PP -The caller should not attempt to free or reallocate \fIws_wordv\fR or -any elements thereof, nor to modify \fIws_wordc\fR. -.PP -To store away the words for use after freeing \fIws\fR with -.BR wordsplit_free , -the caller should use -.BR wordsplit_getwords . -It is more effective than copying the contents of -.I ws_wordv -manually. -.SS INPUT -.TP -.BI "size_t " ws_offs -If the -.B WRDSF_DOOFFS -flag is set, this member specifies the number of initial elements in -.I ws_wordv -to fill with NULLs. These elements are not counted in the returned -.IR ws_wordc . -.TP -.BI "size_t " ws_maxwords -Maximum number of words to return. For this field to take effect, the -\fBWRDSO_MAXWORDS\fR option and \fBWRDSF_OPTIONS\fR flag must be set. -For a detailed discussion, see the chapter -.BR "LIMITING THE NUMBER OF WORDS" . -.TP -.BI "int " ws_flags -Contains flags passed to wordsplit on entry. Can be used as a -read-only member when using \fBwordsplit\fR in incremental mode or -in a loop with -.B WRDSF_REUSE -flag set. -.TP -.BI "int " ws_options -Additional options used when -.B WRDSF_OPTIONS -is set. -.TP -.BI "const char *" ws_delim -Word delimiters. If initialized on input, the -.B WRDSF_DELIM -flag must be set. Otherwise, it is initialized on entry to -.B wordsplit -with the string \fB\(dq \\t\\n\(dq\fR. -.TP -.BI "const char *" ws_comment -A zero-terminated string of characters that begin an inline comment. -If initialized on input, the -.B WRDSF_COMMENT -flag must be set. By default, it's value is \fB\(dq#\(dq\fR. -.TP -.BI "const char *" ws_escape [2] -Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted -strings (\fBws_escape[1]\fR). These are used to translate escape -sequences (\fB\\\fIC\fR) into characters. Each table is a string -consisting of even number of charactes. In each pair of characters, -the first one is a character that can appear after backslash, and the -following one is its representation. For example, the string -\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal -tabulation character and \fB\\n\fR into newline. -.B WRDSF_ESCAPE -flag must be set if this member is initialized. -.TP -.BI "void (*" ws_alloc_die ") (wordsplit_t *)" -This function is called when -.B wordsplit -is unable to allocate memory and the -.B WRDSF_ENOMEMABRT -flag was set. The default function prints a -message on standard error and aborts. This member can be used -to customize error handling. If initialized, the -.B WRDSF_ALLOC_DIE -flag must be set. -.TP -.BI "void (*" ws_error ") (const char *, ...)" -Pointer to function used for error reporting. The invocation -convention is the same as for -.BR printf (3). -The default function formats and prints the message on the standard -error. - -If this member is initialized, the -.B WRDSF_ERROR -flag must be set. -.TP -.BI "void (*" ws_debug ") (const char *, ...)" -Pointer to function used for debugging output. By default it points -to the same function as -.BR ws_error . -If initialized, the -.B WRDSF_DEBUG -flag must be set. -.TP -.BR "const char **" ws_env -A \fBNULL\fR-terminated array of environment variables. It is used -during variable expansion. If set, the -.B WRDSF_ENV -flag must be set. Variable expansion is enabled only if either -.B WRDSF_ENV -or -.B WRDSF_GETVAR -(see below) is set, and -.B WRDSF_NOVAR -flag is not set. - -Each element of -.I ws_env -must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is -the name of the variable, and \fIVALUE\fR is its value. -Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is -described by two elements of -.IR ws_env : -one containing variable name, and the next one with its -value. -.TP -.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)" -Points to the function that will be used during variable expansion to -look up for the value of the environment variable named \fBvar\fR. -This function is used if the variable expansion is enabled (i.e. the -.B WRDSF_NOVAR -flag is not set), and the \fBWRDSF_GETVAR\fR flag is set. - -If both -.B WRDSF_ENV -and -.B WRDSF_GETVAR -are set, the variable is first looked up in the -.I ws_env -array and, if not found there, -.I ws_getvar -is called. - -The name of the variable is specified by the first \fIlen\fR bytes of -the string \fIvar\fR. The \fIclos\fR parameter supplies the -user-specific data (see below the description of \fIws_closure\fR -member) and the \fBret\fR parameter points to the memory location -where output data is to be stored. On success, the function must -store ther a pointer to the string with the value of the variable and -return 0. On error, it must return one of the error codes described -in the section -.BR "ERROR CODES" . -If \fIws_getvar\fR returns -.BR WRDSE_USERERR , -it must store the pointer to the error description string in -.BR *ret . -In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the -data returned in \fBret\fR must be allocated using -.BR malloc (3). -.TP -.BI "void *" ws_closure -Additional user-specific data passed as the last argument to -.I ws_getvar -or -.I ws_command -(see below). If defined, the -.B WRDSF_CLOSURE -flag must be set. -.TP -\fBint (*\fIws_command\fB)\ - (char **ret,\ - const char *cmd,\ - size_t len,\ - char **argv,\ - void *clos)\fR -Pointer to the function that performs command substitution. It treats -the first \fIlen\fR bytes of the string \fIcmd\fR as a command -(whatever it means for the caller) and attempts to execute it. On -success, a pointer to the string with the command output is stored -in the memory location pointed to by \fBret\fR and \fB0\fR is -returned. On error, -the function must return one of the error codes described in the section -.BR "ERROR CODES" . -If \fIws_command\fR returns -.BR WRDSE_USERERR , -it must store the pointer to the error description string in -.BR *ret . -In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the -data returned in \fBret\fR must be allocated using -.BR malloc (3). - -The parameter \fBargv\fR contains the command split into -words using the same settings as the input \fIws\fR structure, with -command substitution disabled. - -The \fIclos\fR parameter supplies user-specific data (see the -description of \fIws_closure\fR member). -.SH FLAGS -The following macros are defined for use in the \fBflags\fR argument. -.TP -.B WRDSF_DEFFLAGS -Default flags. This is a shortcut for: - -\fB(WRDSF_NOVAR |\ - WRDSF_NOCMD |\ - WRDSF_QUOTE |\ - WRDSF_SQUEEZE_DELIMS |\ - WRDSF_CESCAPES)\fR, - -i.e.: disable variable expansion and quote substituton, perform quote -removal, treat any number of consequtive delimiters as a single -delimiter, replace \fBC\fR escapes appearing in the input string with -the corresponding characters. -.TP -.B WRDSF_APPEND -Append the words found to the array resulting from a previous call to -\fBwordsplit\fR. -.TP -.B WRDSF_DOOFFS -Insert -.I ws_offs -initial -.BR NULL s -in the array -.IR ws_wordv . -These are not counted in the returned -.IR ws_wordc . -.TP -.B WRDSF_NOCMD -Don't do command substitution. -.TP -.B WRDSF_REUSE -The parameter \fIws\fR resulted from a previous call to -\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the -allocated storage. -.TP -.B WRDSF_SHOWERR -Print errors using -.BR ws_error . -.TP -.B WRDSF_UNDEF -Consider it an error if an undefined variable is expanded. -.TP -.B WRDSF_NOVAR -Don't do variable expansion. -.TP -.B WRDSF_ENOMEMABRT -Abort on -.B ENOMEM -error. By default, out of memory errors are treated as any other -errors: the error is reported using \fIws_error\fR if the -.B WRDSF_SHOWERR -flag is set, and error code is returned. If this flag is set, the -.B ws_alloc_die -function is called instead. This function is not supposed to return. -.TP -.B WRDSF_WS -Trim off any leading and trailind whitespace from the returned -words. This flag is useful if the \fIws_delim\fR member does not -contain whitespace characters. -.TP -.B WRDSF_SQUOTE -Handle single quotes. -.TP -.B WRDSF_DQUOTE -Handle double quotes. -.TP -.B WRDSF_QUOTE -A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR. -.TP -.B WRDSF_SQUEEZE_DELIMS -Replace each input sequence of repeated delimiters with a single -delimiter. -.TP -.B WRDSF_RETURN_DELIMS -Return delimiters. -.TP -.B WRDSF_SED_EXPR -Treat -.BR sed (1) expressions as words. -.TP -.B WRDSF_DELIM -.I ws_delim -member is initialized. -.TP -.B WRDSF_COMMENT -.I ws_comment -member is initialized. -.TP -.B WRDSF_ALLOC_DIE -.I ws_alloc_die -member is initialized. -.TP -.B WRDSF_ERROR -.I ws_error -member is initialized. -.TP -.B WRDSF_DEBUG -.I ws_debug -member is initialized. -.TP -.B WRDSF_ENV -.I ws_env -member is initialized. -.TP -.B WRDSF_GETVAR -.I ws_getvar member is initialized. -.TP -.B WRDSF_SHOWDBG -Enable debugging. -.TP -.B WRDSF_NOSPLIT -Don't split input into words. This flag is is useful for side -effects, e.g. to perform variable expansion within a string. -.TP -.B WRDSF_KEEPUNDEF -Keep undefined variables in place, instead of expanding them to -empty strings. -.TP -.B WRDSF_WARNUNDEF -Warn about undefined variables. -.TP -.B WRDSF_CESCAPES -Handle \fBC\fR-style escapes in the input string. -.TP -.B WRDSF_CLOSURE -.I ws_closure -is set. -.TP -.B WRDSF_ENV_KV -Each two consecutive elements in the -.I ws_env -array describe a single variable: -.IR ws_env [ n ] -contains variable name, and -.IR ws_env [ "n+1" ] -contains its value. -.TP -.B WRDSF_ESCAPE -.I ws_escape -is set. -.TP -.B WRDSF_INCREMENTAL -Incremental mode. Each subsequent call to \fBwordsplit\fR with -\fBNULL\fR as its first argument parses the next word from the input. -See the section -.B INCREMENTAL MODE -for a detailed discussion. -.TP -.B WRDSF_PATHEXPAND -Perform pathname and tilde expansion. If this flag is set, the -\fIws_options\fR member must also be initialized. See the -subsection -.B "Pathname expansion" -for details. -.TP -.B WRDSF_OPTIONS -The -.I ws_options -member is initialized. -.SH OPTIONS -The -.I ws_options -member is consulted if the -.B WRDSF_OPTIONS -flag is set. It contains a bitwise \fBOR\fR of one or more of the -following options: -.TP -.B WRDSO_NULLGLOB -Remove the words that produce empty string after pathname expansion. -.TP -.B WRDSO_FAILGLOB -Output error message if pathname expansion produces empty string. -.TP -.B WRDSO_DOTGLOB -During pathname expansion allow a leading period to be matched by -metacharacters. -.PP -.TP -.B WRDSO_BSKEEP_WORD -Quote removal: when an unrecognized escape sequence is encountered in a word, -preserve it on output. If that bit is not set, the backslash is -removed from such sequences. -.TP -.B WRDSO_OESC_WORD -Quote removal: handle octal escapes in words. -.TP -.B WRDSO_XESC_WORD -Quote removal: handle hex escapes in words. -.TP -.B WRDSO_BSKEEP_QUOTE -Quote removal: when an unrecognized escape sequence is encountered in -a doubly-quoted string, preserve it on output. If that bit is not -set, the backslash is removed from such sequences. -.TP -.B WRDSO_OESC_QUOTE -Quote removal: handle octal escapes in doubly-quoted strings. -.TP -.B WRDSO_XESC_QUOTE -Quote removal: handle hex escapes in doubly-quoted strings. -.TP -.B WRDSO_MAXWORDS -The \fBws_maxwords\fR member is initialized. This is used to control -the number of words returned by a call to \fBwordsplit\fR. For a -detailed discussion, refer to the chapter -.BR "LIMITING THE NUMBER OF WORDS" . -.SH "ERROR CODES" -.TP -.BR WRDSE_OK ", " WRDSE_EOF -Successful return. -.TP -.B WRDSE_QUOTE -Missing closing quote. The \fIws_endp\fR points to the position in -the input string where the error occurred. -.TP -.B WRDSE_NOSPACE -Memory exhausted. -.TP -.B WRDSE_USAGE -Invalid wordsplit usage. -.TP -.B WRDSE_CBRACE -Unbalanced curly brace. -.TP -.B WRDSE_UNDEF -Undefined variable. This error is returned only if the -\fBWRDSF_UNDEF\fR flag is set. -.TP -.B WRDSE_NOINPUT -Input exhausted. This is not acually an error. This code is returned -if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental -mode and encounters end of input string. See the section -.BR "INCREMENTAL MODE" . -.TP -.B WRDSE_PAREN -Unbalanced parenthesis. -.TP -.B WRDSE_GLOBERR -An error occurred during pattern matching. -.TP -.B WRDSE_USERERR -User-defined error. Normally this error is returned by \fBws_getvar\fR or -\fBws_command\fR. Use the function -.B wordsplit_strerror -to get textual description of the error. -.SH "RETURN VALUE" -Both -.B wordsplit -and -.B wordsplit_len -return \fB0\fR on success, and a non-zero error code on -error (see the section -.BR "ERROR CODES" ). -.PP -.B wordsplit_strerror -returns a pointer to the constant string describing the last error -condition that occurred in -.IR ws . -.SH EXAMPLE -The short program below implements a function that parses the -input string similarly to the shell. All expansions are performed. -Default error reporting is used. -.PP -.EX -#include <stdlib.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <wordsplit.h> - -/* Run command from \fIstr\fR (\fIlen\fR bytes long) and store its - output in \fIret\fR. - \fIargv\fR and \fIclosure\fR are not used. - Return wordsplit error code. - */ -static int runcmd(char **ret, const char *str, size_t len, - char **argv, void *closure) -{ - FILE *fp; - char *cmd; - int c, lastc; - char *buffer = NULL; - size_t bufsize = 0; - size_t buflen = 0; - - /* Convert to a null-terminated string for \fBpopen\fR(3) */ - cmd = malloc(len + 1); - if (!cmd) - return WRDSE_NOSPACE; - memcpy(cmd, str, len); - cmd[len] = 0; - - fp = popen(cmd, "r"); - if (!fp) { - char buf[128]; - - snprintf(buf, sizeof buf, "can't run %s: %s", - cmd, strerror(errno)); - *ret = strdup(buf); - if (!*ret) - return WRDSE_NOSPACE; - else - return WRDSE_USERERR; - } - - /* Collect the output, reallocating \fIbuffer\fR as needed. */ - while ((c = fgetc(fp)) != EOF) { - lastc = c; - if (c == '\n') - c = ' '; - if (buflen == bufsize) { - char *p; - - if (bufsize == 0) - bufsize = 80; - else - bufsize *= 2; - p = realloc(buffer, bufsize); - if (!p) { - free(buffer); - free(cmd); - return WRDSE_NOSPACE; - } - buffer = p; - } - buffer[buflen++] = c; - } - - /* Tream off the trailing newline */ - if (buffer) { - if (lastc == '\n') - --buflen; - buffer[buflen] = 0; - } - - pclose(fp); - free(cmd); - - /* Return the composed string. */ - *ret = buffer; - return WRDSE_OK; -} - -extern char **environ; - -/* Parse \fIs\fR much as shell does. Return array of words on - succes, and NULL on error. - */ -char **shell_parse(char *s) -{ - wordsplit_t ws; - size_t wc; - char **wv; - int rc; - - /* Initialize \fIws\fR */ - ws.ws_env = (const char **) environ; - ws.ws_command = runcmd; - /* Call \fBwordsplit\fR. Let it report errors, if any. */ - rc = wordsplit(s, &ws, - WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_PATHEXPAND - | WRDSF_SHOWERR); - if (rc == WRDSE_OK) - /* Store away the resulting words on success. */ - wordsplit_getwords(&ws, &wc, &wv); - else - wv = NULL; - wordsplit_free(&ws); - return wv; -} -.EE -.SH AUTHORS -Sergey Poznyakoff -.SH "BUG REPORTS" -Report bugs to <gray+grecs@gnu.org.ua>. -.SH COPYRIGHT -Copyright \(co 2009-2018 Sergey Poznyakoff -.br -.na -License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> -.br -.ad -This is free software: you are free to change and redistribute it. -There is NO WARRANTY, to the extent permitted by law. -.\" Local variables: -.\" eval: (add-hook 'write-file-hooks 'time-stamp) -.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \"" -.\" time-stamp-format: "%:B %:d, %:y" -.\" time-stamp-end: "\"" -.\" time-stamp-line-limit: 20 -.\" end: - |