aboutsummaryrefslogtreecommitdiff
path: root/doc/wordsplit.3
diff options
context:
space:
mode:
Diffstat (limited to 'doc/wordsplit.3')
-rw-r--r--doc/wordsplit.31036
1 files changed, 0 insertions, 1036 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3
deleted file mode 100644
index 1a6f80b..0000000
--- a/doc/wordsplit.3
+++ /dev/null
@@ -1,1036 +0,0 @@
-.\" This file is part of grecs -*- nroff -*-
-.\" Copyright (C) 2007-2018 Sergey Poznyakoff
-.\"
-.\" Grecs is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published by
-.\" the Free Software Foundation; either version 3, or (at your option)
-.\" any later version.
-.\"
-.\" Grecs is distributed in the hope that it will be useful,
-.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-.\" GNU General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>.
-.\"
-.TH WORDSPLIT 3 "May 22, 2018" "GRECS" "Grecs User Reference"
-.SH NAME
-wordsplit \- split string into words
-.SH SYNOPSIS
-.B #include <wordsplit.h>
-.sp
-\fBint wordsplit (const char *\fIs\fB,\
- wordsplit_t *\fIws\fB, int \fIflags\fB);\fR
-.sp
-\fBint wordsplit_len (const char *\fIs\fB,\
- \fBsize_t \fIlen\fR,\
- \fBwordsplit_t *\fIp\fB,\
- int \fIflags\fB);
-.sp
-\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR
-.sp
-\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR
-.sp
-\fBvoid wordsplit_getwords (wordsplit_t *\fIws\fB,\
- int *\fIwordc\fB, char ***\fIwordv\fB);
-.sp
-\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR
-.sp
-\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR
-.sp
-\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
-.SH DESCRIPTION
-The function \fBwordsplit\fR splits the string \fIs\fR into words
-using a set of rules governed by \fIflags\fR. Depending on
-\fIflags\fR, the function performs the following operations:
-whitespace trimming, tilde expansion, variable expansion, quote
-removal, command substitution, and path expansion. On success,
-\fBwordsplit\fR returns 0 and stores the words found in the member
-\fBws_wordv\fR and the number of words in the member \fBws_wordc\fR.
-On error, a non-zero error code is returned.
-.PP
-The function \fBwordsplit_len\fR acts similarly, except that it
-accesses only first \fBlen\fR bytes of the string \fIs\fR, which is
-not required to be null-terminated.
-.PP
-When no longer needed, the resources allocated by a call to one of
-these functions must be freed using
-.BR wordsplit_free .
-.PP
-The function
-.B wordsplit_free_words
-frees only the memory allocated for elements of
-.I ws_wordv
-and initializes
-.I ws_wordc
-to zero.
-.PP
-The usual calling sequence is:
-.PP
-.EX
-wordsplit_t ws;
-int rc;
-
-if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) {
- wordsplit_perror(&ws);
- return;
-}
-for (i = 0; i < ws.ws_wordc; i++) {
- /* do something with ws.ws_wordv[i] */
-}
-wordsplit_free(&ws);
-.EE
-.PP
-The function
-.B wordsplit_getwords
-returns in \fIwordv\fR an array of words, and in \fIwordc\fR the number
-of elements in \fIwordv\fR. The array can be used after calling
-.BR wordsplit_free .
-The caller becomes responsible for freeing the memory allocated for
-each element of the array and the array pointer itself.
-.PP
-The function
-.B wordsplit_perror
-prints error message from the last invocation of \fBwordsplit\fR. It
-uses the function pointed to by the
-.I ws_error
-member. By default, it outputs the message on the standard error.
-.PP
-For more sophisticated error reporting, the function
-.B wordsplit_strerror
-can be used. It returns a pointer to the string describing the error.
-The caller should treat this pointer as a constant string. It should
-not try to alter or deallocate it.
-.PP
-The function
-.B wordsplit_clearerr
-clears the error condition associated with \fIws\fR.
-.SH INCREMENTAL MODE
-In incremental mode \fBwordsplit\fR parses one word per invocation.
-It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when
-entire input string has been processed.
-.PP
-This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in
-the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must
-have \fBNULL\fR as first argument. Each successful
-call will return exactly one word in \fBws.ws_wordv[0]\fR.
-.PP
-An example usage:
-.PP
-.EX
-wordsplit_t ws;
-int rc;
-flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL;
-
-for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK;
- rc = wordsplit(NULL, &ws, flags)) {
- process(ws.ws_wordv[0]);
-}
-
-if (rc != WRDSE_NOINPUT)
- wordsplit_perror(&ws);
-
-wordsplit_free(&ws);
-.EE
-.SH OPTIONS
-The number of flags is limited to 32 (the width of \fBuint32_t\fR data
-type) and each bit is occupied by a corresponding flag. However, the
-number of features \fBwordsplit\fR provides required still
-more. Additional features can be requested by setting a corresponding
-\fIoption bit\fR in the \fBws_option\fR field of the \fBstruct
-wordsplit\fR argument. To inform wordsplit functions that this field
-is initialized the \fBWRDSF_OPTIONS\fR flag must be set.
-.PP
-Option symbolic names begin with \fBWRDSO_\fR. They are discussed in
-detail in the subsequent chapters.
-.SH EXPANSION
-Expansion is performed on the input after it has been split into
-words. There are several kinds of expansion, which of them are
-performed is controlled by appropriate bits set in the \fIflags\fR
-argument. Whatever expansion kinds are enabled, they are always run
-in the same order as described in this section.
-.SS Whitespace trimming
-Whitespace trimming removes any leading and trailing whitespace from
-the initial word array. It is enabled by the
-.B WRDSF_WS
-flag. Whitespace trimming is needed only if you redefine
-word delimiters (\fIws_delim\fR member) so that they don't contain
-whitespace characters (\fB\(dq \\t\\n\(dq\fR).
-.SS Tilde expansion
-Tilde expansion is enabled if the
-.B WRDSF_PATHEXPAND
-bit is set. It expands all words that begin with an unquoted tilde
-character (`\fB~\fR'). If tilde is followed immediately by a slash,
-it is replaced with the home directory of the current user (as
-determined by his \fBpasswd\fR entry). A tilde alone is handled the
-same way. Otherwise, the characters between the tilde and first slash
-character (or end of string, if it doesn't contain any) are treated as
-a login name. and are replaced (along with the tilde itself) with the
-home directory of that user. If there is no user with such login
-name, the word is left unchanged.
-.SS Variable expansion
-Variable expansion replaces each occurrence of
-.BI $ NAME
-or
-.BI ${ NAME }
-with the value of the variable \fINAME\fR. It is enabled if the
-flag \fBWRDSF_NOVAR\fR is not set. The caller is responsible for
-supplying the table of available variables. Two mechanisms are
-provided: environment array and a callback function.
-.PP
-Environment array is a \fBNULL\fR-terminated array of variables,
-stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be
-set in order to instruct \fBwordsplit\fR to use this array.
-.PP
-By default, elements of the \fIws_env\fR array have the form
-.IR NAME = VALUE .
-An alternative format is enabled by the
-.B WRDSF_ENV_KV
-flag. When it is set, each variable is described by two consecutive
-elements in the array:
-.IR ws_env [ n ]
-containing the variable name, and
-.IR ws_env [ "n+1" ]
-containing its value. If the latter is \fBNULL\fR, the corresponding
-variable is undefined.
-.PP
-More sophisticated variable tables can be implemented using
-callback function. The \fIws_getvar\fR member should be set to point
-to that function and \fBWRDSF_GETVAR\fR flag must be set. The
-function itself shall be defined as
-.PP
-.EX
-int getvar (char **ret, const char *var, size_t len, void *clos);
-.EE
-.PP
-The function shall look up for the variable identified by the first
-\fIlen\fR bytes of the string \fIvar\fR. If such variable is found,
-the function shall store a copy of its value (allocated using
-\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and
-return \fBWRDSE_OK\fR. If the variable is not found, the function shall
-return \fBWRDSE_UNDEF\fR. Otherwise, a non-zero error code shall be
-returned.
-.PP
-If \fIws_getvar\fR returns
-.BR WRDSE_USERERR ,
-it must store the pointer to the error description string in
-.BR *ret .
-In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
-data returned in \fBret\fR must be allocated using
-.BR malloc (3).
-.PP
-If both
-.I ws_env
-and
-.I ws_getvar
-are used, the variable is first looked up in
-.IR ws_env ,
-and if not found there, the
-.I ws_getvar
-function is called.
-.PP
-During variable expansion, the forms below cause
-.B wordsplit
-to test for a variable that is unset or null. Omitting the
-colon results in a test only for a variable that is unset.
-.TP
-.BI ${ variable :- word }
-.BR "Use Default Values" .
-If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted.
-Otherwise, the value of \fIvariable\fR is substituted.
-.TP
-.BI ${ variable := word }
-.BR "Assign Default Values" .
-If \fIvariable\fR is unset or null, the expansion of \fIword\fR is
-assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted.
-.TP
-.BI ${ variable :? word }
-.BR "Display Error if Null or Unset" .
-If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a
-message to that effect if word is not present) is output using
-.IR ws_error .
-Otherwise, the value of \fIvariable\fR is substituted.
-.TP
-.BI ${ variable :+ word }
-.BR "Use Alternate Value" .
-If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
-expansion of \fIword\fR is substituted.
-.SS Quote removal
-Quote removal translates unquoted escape sequences into corresponding bytes.
-An escape sequence is a backslash followed by one or more characters. By
-default, each sequence \fB\\\fIC\fR appearing in unquoted words is
-replaced with the character \fIC\fR. In doubly-quoted strings, two
-backslash sequences are recognized: \fB\\\\\fR translates to a single
-backslash, and \fB\\\(dq\fR translates to a double-quote.
-.PP
-Two flags are provided to modify this behavior. If
-.I WRDSF_CESCAPES
-flag is set, the following escape sequences are recognized:
-.sp
-.nf
-.ta 8n 18n 42n
-.ul
- Sequence Expansion ASCII
- \fB\\\\\fR \fB\\\fR 134
- \fB\\\(dq\fR \fB\(dq\fR 042
- \fB\\a\fR audible bell 007
- \fB\\b\fR backspace 010
- \fB\\f\fR form-feed 014
- \fB\\n\fR new line 012
- \fB\\r\fR charriage return 015
- \fB\\t\fR horizontal tabulation 011
- \fB\\v\fR vertical tabulation 013
-.fi
-.sp
-The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands
-for a two-digit hex number is replaced with ASCII character \fINN\fR.
-The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit
-octal number is replaced with ASCII character whose code is \fINNN\fR.
-.PP
-The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape
-sequences. If it is set, the \fBws_escape\fR member must be
-initialized. This member provides escape tables for unquoted words
-(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each
-table is a string consisting of even number of charactes. In each
-pair of characters, the first one is a character that can appear after
-backslash, and the following one is its translation. For example, the
-above table of C escapes is represented as
-\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR.
-.PP
-It is valid to initialize \fBws_escape\fR elements to zero. In this
-case, no backslash translation occurs.
-.PP
-The handling of octal and hex escapes is controlled by the following
-bits in \fBws_options\fR:
-.TP
-.B WRDSO_BSKEEP_WORD
-When an unrecognized escape sequence is encountered in a word,
-preserve it on output. If that bit is not set, the backslash is
-removed from such sequences.
-.TP
-.B WRDSO_OESC_WORD
-Handle octal escapes in words.
-.TP
-.B WRDSO_XESC_WORD
-Handle hex escapes in words.
-.TP
-.B WRDSO_BSKEEP_QUOTE
-When an unrecognized escape sequence is encountered in a doubly-quoted
-string, preserve it on output. If that bit is not set, the backslash is
-removed from such sequences.
-.TP
-.B WRDSO_OESC_QUOTE
-Handle octal escapes in doubly-quoted strings.
-.TP
-.B WRDSO_XESC_QUOTE
-Handle hex escapes in doubly-quoted strings.
-.SS Command substitution
-During \fIcommand substitution\fR, each word is scanned for commands.
-Each command found is executed and replaced by the output it creates.
-.PP
-The syntax is:
-.PP
-.RS +4
-.BI $( command )
-.RE
-.PP
-Command substitutions may be nested.
-.PP
-Unless the substitution appears within double quotes, word splitting and
-pathname expansion are performed on its result.
-.PP
-To enable command substitution, the caller must initialize the
-.I ws_command
-member with the address of the substitution function and make sure the
-.B WRDSF_NOCMD
-flag is not set.
-.PP
-The substitution function should be defined as follows:
-.PP
-.RS +4
-\fBint \fIcommand\fB\
- (char **\fIret\fB,\
- const char *\fIcmd\fB,\
- size_t \fIlen,\fB\
- char **\fIargv\fB,\
- void *\fIclos\fB);\fR
-.RE
-.PP
-First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as
-it appeared between
-.BR $( and ),
-with all expansions performed.
-.PP
-The \fIargv\fR parameter contains the command
-line split into words using the same settings as the input \fIws\fR structure.
-.PP
-The \fIclos\fR parameter supplies user-specific data, passed in the
-\fIws_closure\fR member).
-.PP
-On success, the function stores a pointer to the
-output string in the memory location pointed to by \fIret\fR and
-returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the
-error codes described in the section
-.BR "ERROR CODES" .
-If
-.BR WRDSE_USERERR ,
-is returned, a pointer to the error description string must be stored in
-.BR *ret .
-.PP
-When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the
-data stored in \fB*ret\fR must be allocated using
-.BR malloc (3).
-.SS Pathname expansion
-Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is
-set. Each unquoted word is scanned for characters
-.BR * , ? ", and " [ .
-If one of these appears, the word is considered a \fIpattern\fR (in
-the sense of
-.BR glob (3))
-and is replaced with an alphabetically sorted list of file names matching the
-pattern.
-.PP
-If no matches are found for a word
-and the \fIws_options\fR member has the
-.B WRDSO_NULLGLOB
-bit set, the word is removed.
-.PP
-If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output
-for each such word using
-.IR ws_error .
-.PP
-When matching a pattern, the dot at the start of a name or immediately
-following a slash must be matched explicitly, unless
-the \fBWRDSO_DOTGLOB\fR option is set.
-.SH LIMITING THE NUMBER OF WORDS
-The maximum number of words to be returned can be limited by setting
-the \fBws_maxwords\fR member to the desired count, and setting the
-\fBWRDSO_MAXWORDS\fR option, e.g.:
-.sp
-.EX
-struct wordsplit ws;
-ws.ws_maxwords = 3;
-ws.ws_options = WRDSO_MAXWORDS;
-wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS);
-.EE
-.PP
-If the actual number of words in the expanded input is greater than
-the supplied limit, the trailing part of the input will be returned in
-the last word. For example, if the input to the above fragment were
-\fBNow is the time for all good men\fR, then the returned words would be:
-.sp
-.EX
-"Now"
-"is"
-"the time for all good men"
-.EE
-.SH WORDSPLIT_T STRUCTURE
-The data type \fBwordsplit_t\fR has three members that contain
-output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
-and a number of members that the caller can initialize on input in
-order to customize the function behavior. Each its member has a
-corresponding flag bit, which must be set in the \fIflags\fR argument
-in order to instruct the \fBwordsplit\fR function to use it.
-.SS OUTPUT
-.TP
-.BI size_t " ws_wordc"
-Number of words in \fIws_wordv\fR. Accessible upon successful return
-from \fBwordsplit\fR.
-.TP
-.BI "char ** " ws_wordv
-Array of resulting words. Accessible upon successful return
-from \fBwordsplit\fR.
-.TP
-.BI "size_t " ws_wordi
-Total number of words processed. This field is intended for use with
-.B WRDSF_INCREMENTAL
-flag. If that flag is not set, the following relation holds:
-.BR "ws_wordi == ws_wordc - ws_offs" .
-.TP
-.BI "int " ws_errno
-Error code, if the invocation of \fBwordsplit\fR or
-\fBwordsplit_len\fR failed. This is the same value as returned from
-the function in that case.
-.PP
-The caller should not attempt to free or reallocate \fIws_wordv\fR or
-any elements thereof, nor to modify \fIws_wordc\fR.
-.PP
-To store away the words for use after freeing \fIws\fR with
-.BR wordsplit_free ,
-the caller should use
-.BR wordsplit_getwords .
-It is more effective than copying the contents of
-.I ws_wordv
-manually.
-.SS INPUT
-.TP
-.BI "size_t " ws_offs
-If the
-.B WRDSF_DOOFFS
-flag is set, this member specifies the number of initial elements in
-.I ws_wordv
-to fill with NULLs. These elements are not counted in the returned
-.IR ws_wordc .
-.TP
-.BI "size_t " ws_maxwords
-Maximum number of words to return. For this field to take effect, the
-\fBWRDSO_MAXWORDS\fR option and \fBWRDSF_OPTIONS\fR flag must be set.
-For a detailed discussion, see the chapter
-.BR "LIMITING THE NUMBER OF WORDS" .
-.TP
-.BI "int " ws_flags
-Contains flags passed to wordsplit on entry. Can be used as a
-read-only member when using \fBwordsplit\fR in incremental mode or
-in a loop with
-.B WRDSF_REUSE
-flag set.
-.TP
-.BI "int " ws_options
-Additional options used when
-.B WRDSF_OPTIONS
-is set.
-.TP
-.BI "const char *" ws_delim
-Word delimiters. If initialized on input, the
-.B WRDSF_DELIM
-flag must be set. Otherwise, it is initialized on entry to
-.B wordsplit
-with the string \fB\(dq \\t\\n\(dq\fR.
-.TP
-.BI "const char *" ws_comment
-A zero-terminated string of characters that begin an inline comment.
-If initialized on input, the
-.B WRDSF_COMMENT
-flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
-.TP
-.BI "const char *" ws_escape [2]
-Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted
-strings (\fBws_escape[1]\fR). These are used to translate escape
-sequences (\fB\\\fIC\fR) into characters. Each table is a string
-consisting of even number of charactes. In each pair of characters,
-the first one is a character that can appear after backslash, and the
-following one is its representation. For example, the string
-\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal
-tabulation character and \fB\\n\fR into newline.
-.B WRDSF_ESCAPE
-flag must be set if this member is initialized.
-.TP
-.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
-This function is called when
-.B wordsplit
-is unable to allocate memory and the
-.B WRDSF_ENOMEMABRT
-flag was set. The default function prints a
-message on standard error and aborts. This member can be used
-to customize error handling. If initialized, the
-.B WRDSF_ALLOC_DIE
-flag must be set.
-.TP
-.BI "void (*" ws_error ") (const char *, ...)"
-Pointer to function used for error reporting. The invocation
-convention is the same as for
-.BR printf (3).
-The default function formats and prints the message on the standard
-error.
-
-If this member is initialized, the
-.B WRDSF_ERROR
-flag must be set.
-.TP
-.BI "void (*" ws_debug ") (const char *, ...)"
-Pointer to function used for debugging output. By default it points
-to the same function as
-.BR ws_error .
-If initialized, the
-.B WRDSF_DEBUG
-flag must be set.
-.TP
-.BR "const char **" ws_env
-A \fBNULL\fR-terminated array of environment variables. It is used
-during variable expansion. If set, the
-.B WRDSF_ENV
-flag must be set. Variable expansion is enabled only if either
-.B WRDSF_ENV
-or
-.B WRDSF_GETVAR
-(see below) is set, and
-.B WRDSF_NOVAR
-flag is not set.
-
-Each element of
-.I ws_env
-must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is
-the name of the variable, and \fIVALUE\fR is its value.
-Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is
-described by two elements of
-.IR ws_env :
-one containing variable name, and the next one with its
-value.
-.TP
-.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)"
-Points to the function that will be used during variable expansion to
-look up for the value of the environment variable named \fBvar\fR.
-This function is used if the variable expansion is enabled (i.e. the
-.B WRDSF_NOVAR
-flag is not set), and the \fBWRDSF_GETVAR\fR flag is set.
-
-If both
-.B WRDSF_ENV
-and
-.B WRDSF_GETVAR
-are set, the variable is first looked up in the
-.I ws_env
-array and, if not found there,
-.I ws_getvar
-is called.
-
-The name of the variable is specified by the first \fIlen\fR bytes of
-the string \fIvar\fR. The \fIclos\fR parameter supplies the
-user-specific data (see below the description of \fIws_closure\fR
-member) and the \fBret\fR parameter points to the memory location
-where output data is to be stored. On success, the function must
-store ther a pointer to the string with the value of the variable and
-return 0. On error, it must return one of the error codes described
-in the section
-.BR "ERROR CODES" .
-If \fIws_getvar\fR returns
-.BR WRDSE_USERERR ,
-it must store the pointer to the error description string in
-.BR *ret .
-In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
-data returned in \fBret\fR must be allocated using
-.BR malloc (3).
-.TP
-.BI "void *" ws_closure
-Additional user-specific data passed as the last argument to
-.I ws_getvar
-or
-.I ws_command
-(see below). If defined, the
-.B WRDSF_CLOSURE
-flag must be set.
-.TP
-\fBint (*\fIws_command\fB)\
- (char **ret,\
- const char *cmd,\
- size_t len,\
- char **argv,\
- void *clos)\fR
-Pointer to the function that performs command substitution. It treats
-the first \fIlen\fR bytes of the string \fIcmd\fR as a command
-(whatever it means for the caller) and attempts to execute it. On
-success, a pointer to the string with the command output is stored
-in the memory location pointed to by \fBret\fR and \fB0\fR is
-returned. On error,
-the function must return one of the error codes described in the section
-.BR "ERROR CODES" .
-If \fIws_command\fR returns
-.BR WRDSE_USERERR ,
-it must store the pointer to the error description string in
-.BR *ret .
-In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
-data returned in \fBret\fR must be allocated using
-.BR malloc (3).
-
-The parameter \fBargv\fR contains the command split into
-words using the same settings as the input \fIws\fR structure, with
-command substitution disabled.
-
-The \fIclos\fR parameter supplies user-specific data (see the
-description of \fIws_closure\fR member).
-.SH FLAGS
-The following macros are defined for use in the \fBflags\fR argument.
-.TP
-.B WRDSF_DEFFLAGS
-Default flags. This is a shortcut for:
-
-\fB(WRDSF_NOVAR |\
- WRDSF_NOCMD |\
- WRDSF_QUOTE |\
- WRDSF_SQUEEZE_DELIMS |\
- WRDSF_CESCAPES)\fR,
-
-i.e.: disable variable expansion and quote substituton, perform quote
-removal, treat any number of consequtive delimiters as a single
-delimiter, replace \fBC\fR escapes appearing in the input string with
-the corresponding characters.
-.TP
-.B WRDSF_APPEND
-Append the words found to the array resulting from a previous call to
-\fBwordsplit\fR.
-.TP
-.B WRDSF_DOOFFS
-Insert
-.I ws_offs
-initial
-.BR NULL s
-in the array
-.IR ws_wordv .
-These are not counted in the returned
-.IR ws_wordc .
-.TP
-.B WRDSF_NOCMD
-Don't do command substitution.
-.TP
-.B WRDSF_REUSE
-The parameter \fIws\fR resulted from a previous call to
-\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the
-allocated storage.
-.TP
-.B WRDSF_SHOWERR
-Print errors using
-.BR ws_error .
-.TP
-.B WRDSF_UNDEF
-Consider it an error if an undefined variable is expanded.
-.TP
-.B WRDSF_NOVAR
-Don't do variable expansion.
-.TP
-.B WRDSF_ENOMEMABRT
-Abort on
-.B ENOMEM
-error. By default, out of memory errors are treated as any other
-errors: the error is reported using \fIws_error\fR if the
-.B WRDSF_SHOWERR
-flag is set, and error code is returned. If this flag is set, the
-.B ws_alloc_die
-function is called instead. This function is not supposed to return.
-.TP
-.B WRDSF_WS
-Trim off any leading and trailind whitespace from the returned
-words. This flag is useful if the \fIws_delim\fR member does not
-contain whitespace characters.
-.TP
-.B WRDSF_SQUOTE
-Handle single quotes.
-.TP
-.B WRDSF_DQUOTE
-Handle double quotes.
-.TP
-.B WRDSF_QUOTE
-A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR.
-.TP
-.B WRDSF_SQUEEZE_DELIMS
-Replace each input sequence of repeated delimiters with a single
-delimiter.
-.TP
-.B WRDSF_RETURN_DELIMS
-Return delimiters.
-.TP
-.B WRDSF_SED_EXPR
-Treat
-.BR sed (1) expressions as words.
-.TP
-.B WRDSF_DELIM
-.I ws_delim
-member is initialized.
-.TP
-.B WRDSF_COMMENT
-.I ws_comment
-member is initialized.
-.TP
-.B WRDSF_ALLOC_DIE
-.I ws_alloc_die
-member is initialized.
-.TP
-.B WRDSF_ERROR
-.I ws_error
-member is initialized.
-.TP
-.B WRDSF_DEBUG
-.I ws_debug
-member is initialized.
-.TP
-.B WRDSF_ENV
-.I ws_env
-member is initialized.
-.TP
-.B WRDSF_GETVAR
-.I ws_getvar member is initialized.
-.TP
-.B WRDSF_SHOWDBG
-Enable debugging.
-.TP
-.B WRDSF_NOSPLIT
-Don't split input into words. This flag is is useful for side
-effects, e.g. to perform variable expansion within a string.
-.TP
-.B WRDSF_KEEPUNDEF
-Keep undefined variables in place, instead of expanding them to
-empty strings.
-.TP
-.B WRDSF_WARNUNDEF
-Warn about undefined variables.
-.TP
-.B WRDSF_CESCAPES
-Handle \fBC\fR-style escapes in the input string.
-.TP
-.B WRDSF_CLOSURE
-.I ws_closure
-is set.
-.TP
-.B WRDSF_ENV_KV
-Each two consecutive elements in the
-.I ws_env
-array describe a single variable:
-.IR ws_env [ n ]
-contains variable name, and
-.IR ws_env [ "n+1" ]
-contains its value.
-.TP
-.B WRDSF_ESCAPE
-.I ws_escape
-is set.
-.TP
-.B WRDSF_INCREMENTAL
-Incremental mode. Each subsequent call to \fBwordsplit\fR with
-\fBNULL\fR as its first argument parses the next word from the input.
-See the section
-.B INCREMENTAL MODE
-for a detailed discussion.
-.TP
-.B WRDSF_PATHEXPAND
-Perform pathname and tilde expansion. If this flag is set, the
-\fIws_options\fR member must also be initialized. See the
-subsection
-.B "Pathname expansion"
-for details.
-.TP
-.B WRDSF_OPTIONS
-The
-.I ws_options
-member is initialized.
-.SH OPTIONS
-The
-.I ws_options
-member is consulted if the
-.B WRDSF_OPTIONS
-flag is set. It contains a bitwise \fBOR\fR of one or more of the
-following options:
-.TP
-.B WRDSO_NULLGLOB
-Remove the words that produce empty string after pathname expansion.
-.TP
-.B WRDSO_FAILGLOB
-Output error message if pathname expansion produces empty string.
-.TP
-.B WRDSO_DOTGLOB
-During pathname expansion allow a leading period to be matched by
-metacharacters.
-.PP
-.TP
-.B WRDSO_BSKEEP_WORD
-Quote removal: when an unrecognized escape sequence is encountered in a word,
-preserve it on output. If that bit is not set, the backslash is
-removed from such sequences.
-.TP
-.B WRDSO_OESC_WORD
-Quote removal: handle octal escapes in words.
-.TP
-.B WRDSO_XESC_WORD
-Quote removal: handle hex escapes in words.
-.TP
-.B WRDSO_BSKEEP_QUOTE
-Quote removal: when an unrecognized escape sequence is encountered in
-a doubly-quoted string, preserve it on output. If that bit is not
-set, the backslash is removed from such sequences.
-.TP
-.B WRDSO_OESC_QUOTE
-Quote removal: handle octal escapes in doubly-quoted strings.
-.TP
-.B WRDSO_XESC_QUOTE
-Quote removal: handle hex escapes in doubly-quoted strings.
-.TP
-.B WRDSO_MAXWORDS
-The \fBws_maxwords\fR member is initialized. This is used to control
-the number of words returned by a call to \fBwordsplit\fR. For a
-detailed discussion, refer to the chapter
-.BR "LIMITING THE NUMBER OF WORDS" .
-.SH "ERROR CODES"
-.TP
-.BR WRDSE_OK ", " WRDSE_EOF
-Successful return.
-.TP
-.B WRDSE_QUOTE
-Missing closing quote. The \fIws_endp\fR points to the position in
-the input string where the error occurred.
-.TP
-.B WRDSE_NOSPACE
-Memory exhausted.
-.TP
-.B WRDSE_USAGE
-Invalid wordsplit usage.
-.TP
-.B WRDSE_CBRACE
-Unbalanced curly brace.
-.TP
-.B WRDSE_UNDEF
-Undefined variable. This error is returned only if the
-\fBWRDSF_UNDEF\fR flag is set.
-.TP
-.B WRDSE_NOINPUT
-Input exhausted. This is not acually an error. This code is returned
-if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental
-mode and encounters end of input string. See the section
-.BR "INCREMENTAL MODE" .
-.TP
-.B WRDSE_PAREN
-Unbalanced parenthesis.
-.TP
-.B WRDSE_GLOBERR
-An error occurred during pattern matching.
-.TP
-.B WRDSE_USERERR
-User-defined error. Normally this error is returned by \fBws_getvar\fR or
-\fBws_command\fR. Use the function
-.B wordsplit_strerror
-to get textual description of the error.
-.SH "RETURN VALUE"
-Both
-.B wordsplit
-and
-.B wordsplit_len
-return \fB0\fR on success, and a non-zero error code on
-error (see the section
-.BR "ERROR CODES" ).
-.PP
-.B wordsplit_strerror
-returns a pointer to the constant string describing the last error
-condition that occurred in
-.IR ws .
-.SH EXAMPLE
-The short program below implements a function that parses the
-input string similarly to the shell. All expansions are performed.
-Default error reporting is used.
-.PP
-.EX
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <wordsplit.h>
-
-/* Run command from \fIstr\fR (\fIlen\fR bytes long) and store its
- output in \fIret\fR.
- \fIargv\fR and \fIclosure\fR are not used.
- Return wordsplit error code.
- */
-static int runcmd(char **ret, const char *str, size_t len,
- char **argv, void *closure)
-{
- FILE *fp;
- char *cmd;
- int c, lastc;
- char *buffer = NULL;
- size_t bufsize = 0;
- size_t buflen = 0;
-
- /* Convert to a null-terminated string for \fBpopen\fR(3) */
- cmd = malloc(len + 1);
- if (!cmd)
- return WRDSE_NOSPACE;
- memcpy(cmd, str, len);
- cmd[len] = 0;
-
- fp = popen(cmd, "r");
- if (!fp) {
- char buf[128];
-
- snprintf(buf, sizeof buf, "can't run %s: %s",
- cmd, strerror(errno));
- *ret = strdup(buf);
- if (!*ret)
- return WRDSE_NOSPACE;
- else
- return WRDSE_USERERR;
- }
-
- /* Collect the output, reallocating \fIbuffer\fR as needed. */
- while ((c = fgetc(fp)) != EOF) {
- lastc = c;
- if (c == '\n')
- c = ' ';
- if (buflen == bufsize) {
- char *p;
-
- if (bufsize == 0)
- bufsize = 80;
- else
- bufsize *= 2;
- p = realloc(buffer, bufsize);
- if (!p) {
- free(buffer);
- free(cmd);
- return WRDSE_NOSPACE;
- }
- buffer = p;
- }
- buffer[buflen++] = c;
- }
-
- /* Tream off the trailing newline */
- if (buffer) {
- if (lastc == '\n')
- --buflen;
- buffer[buflen] = 0;
- }
-
- pclose(fp);
- free(cmd);
-
- /* Return the composed string. */
- *ret = buffer;
- return WRDSE_OK;
-}
-
-extern char **environ;
-
-/* Parse \fIs\fR much as shell does. Return array of words on
- succes, and NULL on error.
- */
-char **shell_parse(char *s)
-{
- wordsplit_t ws;
- size_t wc;
- char **wv;
- int rc;
-
- /* Initialize \fIws\fR */
- ws.ws_env = (const char **) environ;
- ws.ws_command = runcmd;
- /* Call \fBwordsplit\fR. Let it report errors, if any. */
- rc = wordsplit(s, &ws,
- WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_PATHEXPAND
- | WRDSF_SHOWERR);
- if (rc == WRDSE_OK)
- /* Store away the resulting words on success. */
- wordsplit_getwords(&ws, &wc, &wv);
- else
- wv = NULL;
- wordsplit_free(&ws);
- return wv;
-}
-.EE
-.SH AUTHORS
-Sergey Poznyakoff
-.SH "BUG REPORTS"
-Report bugs to <gray+grecs@gnu.org.ua>.
-.SH COPYRIGHT
-Copyright \(co 2009-2018 Sergey Poznyakoff
-.br
-.na
-License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
-.br
-.ad
-This is free software: you are free to change and redistribute it.
-There is NO WARRANTY, to the extent permitted by law.
-.\" Local variables:
-.\" eval: (add-hook 'write-file-hooks 'time-stamp)
-.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \""
-.\" time-stamp-format: "%:B %:d, %:y"
-.\" time-stamp-end: "\""
-.\" time-stamp-line-limit: 20
-.\" end:
-

Return to:

Send suggestions and report system problems to the System administrator.