diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-10-30 16:58:00 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2014-10-30 16:58:00 +0200 |
commit | b809ebb1e2aec3f766bf35b226cd86ce2888cbe8 (patch) | |
tree | cde9cef4761263859354deba929d038ffd920047 | |
parent | c28a77d5990eba3734d6d85002a52cf4c332124a (diff) | |
download | grecs-b809ebb1e2aec3f766bf35b226cd86ce2888cbe8.tar.gz grecs-b809ebb1e2aec3f766bf35b226cd86ce2888cbe8.tar.bz2 |
Finish wordsplit docs, improve tests
-rw-r--r-- | doc/wordsplit.3 | 298 | ||||
-rw-r--r-- | src/wordsplit.c | 138 | ||||
-rw-r--r-- | src/wordsplit.h | 36 | ||||
-rw-r--r-- | tests/wordsplit.at | 19 | ||||
-rw-r--r-- | tests/wsp.c | 61 |
5 files changed, 467 insertions, 85 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3 index 2f0cced..123bfb7 100644 --- a/doc/wordsplit.3 +++ b/doc/wordsplit.3 @@ -14,7 +14,7 @@ .\" You should have received a copy of the GNU General Public License .\" along with Grecs. If not, see <http://www.gnu.org/licenses/>. .\" -.TH WORDSPLIT 3 "October 28, 2014" "GRECS" "Grecs User Reference" +.TH WORDSPLIT 3 "October 30, 2014" "GRECS" "Grecs User Reference" .SH NAME wordsplit \- split string into words .SH SYNOPSIS @@ -39,8 +39,7 @@ wordsplit \- split string into words \fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR .SH DESCRIPTION The function \fBwordsplit\fR splits the string \fIs\fR into words -using a set of rules governed by \fIflags\fR and stores the result -in the memory location pointed to by \fIws\fR. Depending on +using a set of rules governed by \fIflags\fR. Depending on \fIflags\fR, the function performs the following: whitespace trimming, tilde expansion, variable expansion, quote removal, command substitution, and path expansion. On success, the function returns 0 @@ -96,11 +95,39 @@ not try to alter or deallocate it. The function .B wordsplit_clearerr clears the error condition associated with \fIws\fR. +.SH INCREMENTAL MODE +In incremental mode \fBwordsplit\fR parses one word per invocation. +It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when it +has processed entire input string. +.PP +This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in +the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must +have \fBNULL\fR as first argument. Each successful +call will return exactly one word in \fBws.ws_wordv[0]\fR. +.PP +An example usage: +.PP +.EX +wordsplit_t ws; +int rc; +flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL; + +for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK; + rc = wordsplit(NULL, &ws, flags)) { + process(ws.ws_wordv[0]); +} + +if (rc != WRDSE_NOINPUT) + wordsplit_perror(&ws); + +wordsplit_free(&ws); +.EE .SH EXPANSION -The number of expansions performed on the input is controlled by -appropriate bits set in the \fIflags\fR argument. Whatever expansions -are enabled, they are always run in the same order as described in this -section. +Expansion is performed on the input after it has been split into +words. There are several kinds of expansion, which of them are +performed is controlled by appropriate bits set in the \fIflags\fR +argument. Whatever expansion kinds are enabled, they are always run +in the same order as described in this section. .SS Whitespace trimming Whitespace trimming removes any leading and trailing whitespace from the initial word array. It is enabled by the @@ -206,8 +233,153 @@ Otherwise, the value of \fIvariable\fR is substituted. If \fIvariable\fR is null or unset, nothing is substituted, otherwise the expansion of \fIword\fR is substituted. .SS Quote removal +Quote removal translates unquoted escape sequences into corresponding bytes. +An escape sequence is a backslash followed by one or more characters. By +default, each sequence \fB\\\fIC\fR appearing in unquoted words is +replaced with the character \fIC\fR. In doubly-quoted strings, two +backslash sequences are recognized: \fB\\\\\fR translates to a single +backslash, and \fB\\\(dq\fR translates to a double-quote. +.PP +Two flags are provided to modify this behavior. If +.I WRDSF_CESCAPES +flag is set, the following escape sequences are recognized: +.sp +.nf +.ta 8n 18n 42n +.ul + Sequence Expansion ASCII + \fB\\\\\fR \fB\\\fR 134 + \fB\\\(dq\fR \fB\(dq\fR 042 + \fB\\a\fR audible bell 007 + \fB\\b\fR backspace 010 + \fB\\f\fR form-feed 014 + \fB\\n\fR new line 012 + \fB\\r\fR charriage return 015 + \fB\\t\fR horizontal tabulation 011 + \fB\\v\fR vertical tabulation 013 +.fi +.sp +The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands +for a two-digit hex number is replaced with ASCII character \fINN\fR. +The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit +octal number is replaced with ASCII character whose code is \fINNN\fR. +.PP +The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape +sequences. If it is set, the \fBws_escape\fR member must be +initialized. This member provides escape tables for unquoted words +(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each +table is a string consisting of even number of charactes. In each +pair of characters, the first one is a character that can appear after +backslash, and the following one is its translation. For example, the +above table of C escapes is represented as +\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR. +.PP +It is valid to initialize \fBws_escape\fR elements to zero. In this +case, no backslash translation occurs. +.PP +The handling if octal and hex escapes is controlled by the following +bits in \fBws_options\fR: +.TP +.B WRDSO_BSKEEP_WORD +When an unrecognized escape sequence is encountered in a word, +preserve it on output. If that bit is not set, the backslash is +removed from such sequences. +.TP +.B WRDSO_OESC_WORD +Handle octal escapes in words. +.TP +.B WRDSO_XESC_WORD +Handle hex escapes in words. +.TP +.B WRDSO_BSKEEP_QUOTE +When an unrecognized escape sequence is encountered in a doubly-quoted +string, preserve it on output. If that bit is not set, the backslash is +removed from such sequences. +.TP +.B WRDSO_OESC_QUOTE +Handle octal escapes in doubly-quoted strings. +.TP +.B WRDSO_XESC_QUOTE +Handle hex escapes in doubly-quoted strings. .SS Command substitution -.SS Path expansion +During \fIcommand substitution\fR, each word is scanned for commands. +Each command found is executed and replaced by the output it creates. +.PP +The syntax is: +.PP +.RS +4 +.BI $( command ) +.RE +.PP +Command substitutions may be nested. +.PP +Unless the substitution appears within double quotes, word splitting and +pathname expansion are performed on its result. +.PP +To enable command substitution, the caller must initialize the +.I ws_command +member with the address of the substitution function and make sure the +.B WRDSF_NOCMD +flag is not set. +.PP +The substitution function should be defined as follows: +.PP +.RS +4 +\fBint \fIcommand\fB\ + (char **\fIret\fB,\ + const char *\fIcmd\fB,\ + size_t \fIlen,\fB\ + char **\fIargv\fB,\ + void *\fIclos\fB);\fR +.RE +.PP +First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as +it appeared between +.BR $( and ), +with all expansions performed. If the +.I WRDSO_ARGV +option is set, the parameter \fIargv\fR contains the command line split into +words using the same settings as the input \fIws\fR structure. +Otherwise, \fIargv\fR is \fBNULL\fR. +.PP +The \fIclos\fR parameter supplies user-specific data, passed in the +\fIws_closure\fR member). +.PP +On success, the function stores a pointer to the +output string in the memory location pointed to by \fIret\fR and +returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the +error codes described in the section +.BR "ERROR CODES" . +If +.BR WRDSE_USERERR , +is returned, a pointer to the error description string must be stored in +.BR *ret . +.PP +When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the +data stored in \fB*ret\fR must be allocated using +.BR malloc (3). +.SS Pathname expansion +Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is +set. Each unquoted word is scanned for characters +.BR * , ? ", and " [ . +If one of these appears, the word is considered a \fIpattern\fR (in +the sense of +.BR glob (3)) +and is replaced with an alphabetically sorted list of file names matching the +pattern. +.PP +If no matches are found for a word +and the \fIws_options\fR member has the +.B WRDSO_NULLGLOB +bit set, the word is removed. +.PP +If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output +for each such word using +.IR ws_error . +.PP +When matching a pattern, the dot at the start of a name or immediately +following a slash must be matched explicitly, unless +the \fBWRDSO_DOTGLOB\fR option is set, .SH WORDSPLIT_T STRUCTURE The data type \fBwordsplit_t\fR has three members that contain output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR, @@ -264,8 +436,15 @@ If initialized on input, the .B WRDSF_COMMENT flag must be set. By default, it's value is \fB\(dq#\(dq\fR. .TP -.BI "const char *" ws_escape -Characters to be escaped with backslash. The +.BI "const char *" ws_escape [2] +Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted +strings (\fBws_escape[1]\fR). These are used to translate escape +sequences (\fB\\\fIC\fR) into characters. Each table is a string +consisting of even number of charactes. In each pair of characters, +the first one is a character that can appear after backslash, and the +following one is its representation. For example, the string +\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal +tabulation character and \fB\\n\fR into newline. .B WRDSF_ESCAPE flag must be set if this member is initialized. .TP @@ -367,7 +546,7 @@ flag must be set. const char *cmd,\ size_t len,\ char **argv,\ - void *clos) + void *clos)\fR Pointer to the function that performs command substitution. It treats the first \fIlen\fR bytes of the string \fIcmd\fR as a command (whatever it means for the caller) and attempts to execute it. On @@ -376,7 +555,7 @@ in the memory location pointed to by \fBret\fR and \fB0\fR is returned. On error, the function must return one of the error codes described in the section .BR "ERROR CODES" . -If \fIws_getvar\fR returns +If \fIws_command\fR returns .BR WRDSE_USERERR , it must store the pointer to the error description string in .BR *ret . @@ -555,7 +734,102 @@ for details. The .I ws_options member is initialized. +.SH OPTIONS +The +.I ws_options +member is consulted if the +.B WRDSF_OPTIONS +flag is set. It contains a bitwise \fBOR\fR of one or more of the +following options: +.TP +.B WRDSO_NULLGLOB +Remove the words that produce empty string after pathname expansion. +.TP +.B WRDSO_FAILGLOB +Output error message if pathname expansion produces empty string. +.TP +.B WRDSO_DOTGLOB +During pathname expansion allow a leading period to be matched by +metacharacters. +.TP +.B WRDSO_ARGV +Split command invocation into words and pass the result to the +\fIws_command\fR function in \fIargv\fR parameter. +.PP +.TP +.B WRDSO_BSKEEP_WORD +Quote removal: when an unrecognized escape sequence is encountered in a word, +preserve it on output. If that bit is not set, the backslash is +removed from such sequences. +.TP +.B WRDSO_OESC_WORD +Quote removal: handle octal escapes in words. +.TP +.B WRDSO_XESC_WORD +Quote removal: handle hex escapes in words. +.TP +.B WRDSO_BSKEEP_QUOTE +Quote removal: when an unrecognized escape sequence is encountered in +a doubly-quoted string, preserve it on output. If that bit is not +set, the backslash is removed from such sequences. +.TP +.B WRDSO_OESC_QUOTE +Quote removal: handle octal escapes in doubly-quoted strings. +.TP +.B WRDSO_XESC_QUOTE +Quote removal: handle hex escapes in doubly-quoted strings. +.SH "ERROR CODES" +.TP +.BR WRDSE_OK ", " WRDSE_EOF +Successful return. +.TP +.B WRDSE_QUOTE +Missing closing quote. The \fIws_endp\fR points to the position in +the input string where the error occurred. +.TP +.B WRDSE_NOSPACE +Memory exhausted. +.TP +.B WRDSE_USAGE +Invalid wordsplit usage. +.TP +.B WRDSE_CBRACE +Unbalanced curly brace. +.TP +.B WRDSE_UNDEF +Undefined variable. This error is returned only if the +\fBWRDSF_UNDEF\fR flag is set. +.TP +.B WRDSE_NOINPUT +Input exhausted. This is not acually an error. This code is returned +if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental +mode and encounters end of input string. See the section +.BR "INCREMENTAL MODE" . +.TP +.B WRDSE_PAREN +Unbalanced parenthesis. +.TP +.B WRDSE_GLOBERR +An error occurred during pattern matching. +.TP +.B WRDSE_USERERR +User-defined error. Normally it is returned by \fBws_getvar\fR or +\fBws_command\fR. Use the function +.B wordsplit_strerror +to get textual description of the error. .SH "RETURN VALUE" +Both +.B wordsplit +and +.B wordsplit_len +return \fB0\fR on success, and a non-zero error code on +error (see the section +.BR "ERROR CODES" ). +.PP +.B wordsplit_strerror +returns a pointer to the constant string describing the last error +condition that occurred in +.IR ws . .SH EXAMPLE .SH "SEE ALSO" .SH AUTHORS diff --git a/src/wordsplit.c b/src/wordsplit.c index c726239..671fcb9 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -176,6 +176,8 @@ wordsplit_init0 (struct wordsplit *wsp) wsp->ws_head = wsp->ws_tail = NULL; } +char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, int flags) @@ -234,6 +236,30 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, if (!(wsp->ws_flags & WRDSF_OPTIONS)) wsp->ws_options = 0; + + if (wsp->ws_flags & WRDSF_ESCAPE) + { + if (!wsp->ws_escape[0]) + wsp->ws_escape[0] = ""; + if (!wsp->ws_escape[1]) + wsp->ws_escape[1] = ""; + } + else + { + if (wsp->ws_flags & WRDSF_CESCAPES) + { + wsp->ws_escape[0] = wordsplit_c_escape_tab; + wsp->ws_escape[1] = wordsplit_c_escape_tab; + wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD + | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; + } + else + { + wsp->ws_escape[0] = ""; + wsp->ws_escape[1] = "\\\\\"\""; + wsp->ws_options |= WRDSO_BSKEEP_QUOTE; + } + } wsp->ws_endp = 0; @@ -551,13 +577,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) return 0; } +static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, + char *dst, const char *src, + size_t n); + static int wsnode_quoteremoval (struct wordsplit *wsp) { struct wordsplit_node *p; - void (*uqfn) (char *, const char *, size_t) = - (wsp->ws_flags & WRDSF_CESCAPES) ? - wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; for (p = wsp->ws_head; p; p = p->next) { @@ -585,11 +612,8 @@ wsnode_quoteremoval (struct wordsplit *wsp) p->flags |= _WSNF_WORD; } - if (wsp->ws_flags & WRDSF_ESCAPE) - wordsplit_general_unquote_copy (p->v.word, str, slen, - wsp->ws_escape); - else - uqfn (p->v.word, str, slen); + wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE, + p->v.word, str, slen); } } return 0; @@ -1825,35 +1849,6 @@ scan_word (struct wordsplit *wsp, size_t start) return _WRDS_OK; } -static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; - -int -wordsplit_c_unquote_char (int c) -{ - char *p; - - for (p = quote_transtab; *p; p += 2) - { - if (*p == c) - return p[1]; - } - return c; -} - -int -wordsplit_c_quote_char (int c) -{ - char *p; - - for (p = quote_transtab + sizeof (quote_transtab) - 2; - p > quote_transtab; p -= 2) - { - if (*p == c) - return p[-1]; - } - return -1; -} - #define to_num(c) \ (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) @@ -1894,7 +1889,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) len += 3; else { - if (wordsplit_c_quote_char (*str) != -1) + if (wordsplit_c_quote_char (*str)) len += 2; else len += 4; @@ -1903,47 +1898,56 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) return len; } -void -wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, - const char *escapable) +int +wsplt_unquote_char (const char *transtab, int c) { - int i; - - for (i = 0; i < n;) + while (*transtab && transtab[1]) { - if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1])) - i++; - *dst++ = src[i++]; + if (*transtab++ == c) + return *transtab; + ++transtab; } - *dst = 0; + return 0; } -void -wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) +int +wsplt_quote_char (const char *transtab, int c) { - int i; - - for (i = 0; i < n;) + for (; *transtab && transtab[1]; transtab += 2) { - if (src[i] == '\\') - i++; - *dst++ = src[i++]; + if (transtab[1] == c) + return *transtab; } - *dst = 0; + return 0; +} + +int +wordsplit_c_unquote_char (int c) +{ + return wsplt_unquote_char (wordsplit_c_escape_tab, c); +} + +int +wordsplit_c_quote_char (int c) +{ + return wsplt_quote_char (wordsplit_c_escape_tab, c); } void -wordsplit_c_unquote_copy (char *dst, const char *src, size_t n) +wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, + char *dst, const char *src, size_t n) { int i = 0; int c; + inquote = !!inquote; while (i < n) { if (src[i] == '\\') { ++i; - if (src[i] == 'x' || src[i] == 'X') + if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC) + && (src[i] == 'x' || src[i] == 'X')) { if (n - i < 2) { @@ -1966,7 +1970,8 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n) } } } - else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i])) + else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC) + && (unsigned char) src[i] < 128 && ISDIGIT (src[i])) { if (n - i < 1) { @@ -1988,8 +1993,17 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n) } } } + else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i]))) + { + *dst++ = c; + ++i; + } else - *dst++ = wordsplit_c_unquote_char (src[i++]); + { + if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP)) + *dst++ = '\\'; + *dst++ = src[i++]; + } } else *dst++ = src[i++]; @@ -2023,7 +2037,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex) { int c = wordsplit_c_quote_char (*src); *dst++ = '\\'; - if (c != -1) + if (c) *dst++ = c; else { diff --git a/src/wordsplit.h b/src/wordsplit.h index 3c1d533..5f36b1e 100644 --- a/src/wordsplit.h +++ b/src/wordsplit.h @@ -43,7 +43,7 @@ struct wordsplit Additional options. */ const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */ const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */ - const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped + const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped with backslash. */ void (*ws_alloc_die) (wordsplit_t *wsp); /* [Input] (WRDSF_ALLOC_DIE) Function called when @@ -184,13 +184,35 @@ struct wordsplit WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) /* Remove the word that produces empty string after path expansion */ -#define WRDSO_NULLGLOB 0x01 +#define WRDSO_NULLGLOB 0x00000001 /* Print error message if path expansion produces empty string */ -#define WRDSO_FAILGLOB 0x02 +#define WRDSO_FAILGLOB 0x00000002 /* Allow a leading period to be matched by metacharacters. */ -#define WRDSO_DOTGLOB 0x04 +#define WRDSO_DOTGLOB 0x00000004 /* ws_command needs argv parameter */ -#define WRDSO_ARGV 0x08 +#define WRDSO_ARGV 0x00000008 +/* Keep backslash in unrecognized escape sequences in words */ +#define WRDSO_BSKEEP_WORD 0x00000010 +/* Handle octal escapes in words */ +#define WRDSO_OESC_WORD 0x00000020 +/* Handle hex escapes in words */ +#define WRDSO_XESC_WORD 0x00000040 + +/* Keep backslash in unrecognized escape sequences in quoted strings */ +#define WRDSO_BSKEEP_QUOTE 0x00000100 +/* Handle octal escapes in quoted strings */ +#define WRDSO_OESC_QUOTE 0x00000200 +/* Handle hex escapes in quoted strings */ +#define WRDSO_XESC_QUOTE 0x00000400 + +#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD +#define WRDSO_OESC WRDSO_OESC_WORD +#define WRDSO_XESC WRDSO_XESC_WORD + +/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */ +#define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q))) +/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */ +#define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q))) #define WRDSE_OK 0 #define WRDSE_EOF WRDSE_OK @@ -215,10 +237,6 @@ int wordsplit_c_unquote_char (int c); int wordsplit_c_quote_char (int c); size_t wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote); -void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, - const char *escapable); -void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); -void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); void wordsplit_perror (wordsplit_t *ws); diff --git a/tests/wordsplit.at b/tests/wordsplit.at index eea5c0a..c466a07 100644 --- a/tests/wordsplit.at +++ b/tests/wordsplit.at @@ -362,8 +362,10 @@ TESTWSP([suppress ws trimming within quotes], 4: "formatfield=In message %{text}, " ]) +# FIXME: numbering TESTWSP([unescape], -[wsp-unescape wsp33],[-default novar nocmd quote escape '\"'], +[wsp-unescape wsp-unescape-simple wsp33], +[-default novar nocmd quote escape ':+:\\""'], [\Seen "quote \"" "bs \\"], [NF: 3 0: \\Seen @@ -371,6 +373,21 @@ TESTWSP([unescape], 2: "bs \\" ]) +TESTWSP([unescape: word/quote], +[wsp-unescape wsp-unescape-word wsp33], +[-default novar nocmd quote escape-word '\\""' escape-quote ':+0x:\\""'], +[\Seen "quote \"" "bs \\" "3\x31 \101" 3\x31 \101], +[NF: 6 +0: Seen +1: "quote \"" +2: "bs \\" +3: "31 A" +4: 3x31 +5: 101 +]) + +# END FIXME + TESTWSP([dquote],[wsp34],[-default novar nocmd dquote], [a "quoted example" isn't it], [NF: 4 diff --git a/tests/wsp.c b/tests/wsp.c index 58b2cee..1f97b5b 100644 --- a/tests/wsp.c +++ b/tests/wsp.c @@ -106,6 +106,8 @@ help () printf (" -%s\n", string_keytab[i].name); printf (" %s ARG\n", string_keytab[i].name); } + printf (" escape-word ARG\n"); + printf (" escape-quote ARG\n"); putchar ('\n'); for (i = 0; opt_keytab[i].name; i++) { @@ -281,6 +283,41 @@ struct kwd env_keytab[] = { { NULL } }; +static void +set_escape_string (wordsplit_t *ws, int *wsflags, int q, const char *str) +{ + if (*str == ':') + { + while (*++str != ':') + { + int f; + switch (*str) + { + case '+': + f = WRDSO_BSKEEP; + break; + + case '0': + f = WRDSO_OESC; + break; + + case 'x': + f = WRDSO_XESC; + break; + + default: + fprintf (stderr, "%s: invalid escape flag near %s\n", + progname, str); + abort (); + } + WRDSO_ESC_SET (ws, q, f); + } + *wsflags |= WRDSF_OPTIONS; + ++str; + } + ws->ws_escape[q] = str; +} + int main (int argc, char **argv) { @@ -397,7 +434,8 @@ main (int argc, char **argv) break; case WRDSF_ESCAPE: - ws.ws_escape = argv[i]; + set_escape_string (&ws, &wsflags, 0, argv[i]); + set_escape_string (&ws, &wsflags, 1, argv[i]); break; } @@ -406,6 +444,27 @@ main (int argc, char **argv) continue; } + if (strcmp (opt, "escape-word") == 0 + || strcmp (opt, "escape-quote") == 0) + { + int q = opt[7] == 'q'; + + i++; + if (i == argc) + { + fprintf (stderr, "%s: missing argument for %s\n", + progname, opt); + exit (1); + } + if (!(wsflags & WRDSF_ESCAPE)) + { + wsflags |= WRDSF_ESCAPE; + ws.ws_escape[!q] = NULL; + } + set_escape_string (&ws, &wsflags, q, argv[i]); + continue; + } + if (strcmp (opt, "dooffs") == 0) { if (negate) |