aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2014-10-30 16:58:00 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2015-12-17 15:26:28 +0200
commit943d725e7f9ab2356d79d3d94261bed81162b959 (patch)
tree1f2f407657cc1c3c98bbc4b9fa7db0d29ef3fe51
parent9bebcfbc1cbe41dfc4ab3b1cffab031f24c29720 (diff)
downloadwordsplit-943d725e7f9ab2356d79d3d94261bed81162b959.tar.gz
wordsplit-943d725e7f9ab2356d79d3d94261bed81162b959.tar.bz2
Finish wordsplit docs, improve tests
-rw-r--r--doc/wordsplit.3298
-rw-r--r--src/wordsplit.c138
-rw-r--r--src/wordsplit.h36
-rw-r--r--tests/wordsplit.at19
-rw-r--r--tests/wsp.c61
5 files changed, 467 insertions, 85 deletions
diff --git a/doc/wordsplit.3 b/doc/wordsplit.3
index 2f0cced..123bfb7 100644
--- a/doc/wordsplit.3
+++ b/doc/wordsplit.3
@@ -14,7 +14,7 @@
.\" You should have received a copy of the GNU General Public License
.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>.
.\"
-.TH WORDSPLIT 3 "October 28, 2014" "GRECS" "Grecs User Reference"
+.TH WORDSPLIT 3 "October 30, 2014" "GRECS" "Grecs User Reference"
.SH NAME
wordsplit \- split string into words
.SH SYNOPSIS
@@ -39,8 +39,7 @@ wordsplit \- split string into words
\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
.SH DESCRIPTION
The function \fBwordsplit\fR splits the string \fIs\fR into words
-using a set of rules governed by \fIflags\fR and stores the result
-in the memory location pointed to by \fIws\fR. Depending on
+using a set of rules governed by \fIflags\fR. Depending on
\fIflags\fR, the function performs the following: whitespace trimming,
tilde expansion, variable expansion, quote removal, command
substitution, and path expansion. On success, the function returns 0
@@ -96,11 +95,39 @@ not try to alter or deallocate it.
The function
.B wordsplit_clearerr
clears the error condition associated with \fIws\fR.
+.SH INCREMENTAL MODE
+In incremental mode \fBwordsplit\fR parses one word per invocation.
+It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when it
+has processed entire input string.
+.PP
+This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in
+the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must
+have \fBNULL\fR as first argument. Each successful
+call will return exactly one word in \fBws.ws_wordv[0]\fR.
+.PP
+An example usage:
+.PP
+.EX
+wordsplit_t ws;
+int rc;
+flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL;
+
+for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK;
+ rc = wordsplit(NULL, &ws, flags)) {
+ process(ws.ws_wordv[0]);
+}
+
+if (rc != WRDSE_NOINPUT)
+ wordsplit_perror(&ws);
+
+wordsplit_free(&ws);
+.EE
.SH EXPANSION
-The number of expansions performed on the input is controlled by
-appropriate bits set in the \fIflags\fR argument. Whatever expansions
-are enabled, they are always run in the same order as described in this
-section.
+Expansion is performed on the input after it has been split into
+words. There are several kinds of expansion, which of them are
+performed is controlled by appropriate bits set in the \fIflags\fR
+argument. Whatever expansion kinds are enabled, they are always run
+in the same order as described in this section.
.SS Whitespace trimming
Whitespace trimming removes any leading and trailing whitespace from
the initial word array. It is enabled by the
@@ -206,8 +233,153 @@ Otherwise, the value of \fIvariable\fR is substituted.
If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
expansion of \fIword\fR is substituted.
.SS Quote removal
+Quote removal translates unquoted escape sequences into corresponding bytes.
+An escape sequence is a backslash followed by one or more characters. By
+default, each sequence \fB\\\fIC\fR appearing in unquoted words is
+replaced with the character \fIC\fR. In doubly-quoted strings, two
+backslash sequences are recognized: \fB\\\\\fR translates to a single
+backslash, and \fB\\\(dq\fR translates to a double-quote.
+.PP
+Two flags are provided to modify this behavior. If
+.I WRDSF_CESCAPES
+flag is set, the following escape sequences are recognized:
+.sp
+.nf
+.ta 8n 18n 42n
+.ul
+ Sequence Expansion ASCII
+ \fB\\\\\fR \fB\\\fR 134
+ \fB\\\(dq\fR \fB\(dq\fR 042
+ \fB\\a\fR audible bell 007
+ \fB\\b\fR backspace 010
+ \fB\\f\fR form-feed 014
+ \fB\\n\fR new line 012
+ \fB\\r\fR charriage return 015
+ \fB\\t\fR horizontal tabulation 011
+ \fB\\v\fR vertical tabulation 013
+.fi
+.sp
+The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands
+for a two-digit hex number is replaced with ASCII character \fINN\fR.
+The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit
+octal number is replaced with ASCII character whose code is \fINNN\fR.
+.PP
+The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape
+sequences. If it is set, the \fBws_escape\fR member must be
+initialized. This member provides escape tables for unquoted words
+(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each
+table is a string consisting of even number of charactes. In each
+pair of characters, the first one is a character that can appear after
+backslash, and the following one is its translation. For example, the
+above table of C escapes is represented as
+\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR.
+.PP
+It is valid to initialize \fBws_escape\fR elements to zero. In this
+case, no backslash translation occurs.
+.PP
+The handling if octal and hex escapes is controlled by the following
+bits in \fBws_options\fR:
+.TP
+.B WRDSO_BSKEEP_WORD
+When an unrecognized escape sequence is encountered in a word,
+preserve it on output. If that bit is not set, the backslash is
+removed from such sequences.
+.TP
+.B WRDSO_OESC_WORD
+Handle octal escapes in words.
+.TP
+.B WRDSO_XESC_WORD
+Handle hex escapes in words.
+.TP
+.B WRDSO_BSKEEP_QUOTE
+When an unrecognized escape sequence is encountered in a doubly-quoted
+string, preserve it on output. If that bit is not set, the backslash is
+removed from such sequences.
+.TP
+.B WRDSO_OESC_QUOTE
+Handle octal escapes in doubly-quoted strings.
+.TP
+.B WRDSO_XESC_QUOTE
+Handle hex escapes in doubly-quoted strings.
.SS Command substitution
-.SS Path expansion
+During \fIcommand substitution\fR, each word is scanned for commands.
+Each command found is executed and replaced by the output it creates.
+.PP
+The syntax is:
+.PP
+.RS +4
+.BI $( command )
+.RE
+.PP
+Command substitutions may be nested.
+.PP
+Unless the substitution appears within double quotes, word splitting and
+pathname expansion are performed on its result.
+.PP
+To enable command substitution, the caller must initialize the
+.I ws_command
+member with the address of the substitution function and make sure the
+.B WRDSF_NOCMD
+flag is not set.
+.PP
+The substitution function should be defined as follows:
+.PP
+.RS +4
+\fBint \fIcommand\fB\
+ (char **\fIret\fB,\
+ const char *\fIcmd\fB,\
+ size_t \fIlen,\fB\
+ char **\fIargv\fB,\
+ void *\fIclos\fB);\fR
+.RE
+.PP
+First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as
+it appeared between
+.BR $( and ),
+with all expansions performed. If the
+.I WRDSO_ARGV
+option is set, the parameter \fIargv\fR contains the command line split into
+words using the same settings as the input \fIws\fR structure.
+Otherwise, \fIargv\fR is \fBNULL\fR.
+.PP
+The \fIclos\fR parameter supplies user-specific data, passed in the
+\fIws_closure\fR member).
+.PP
+On success, the function stores a pointer to the
+output string in the memory location pointed to by \fIret\fR and
+returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the
+error codes described in the section
+.BR "ERROR CODES" .
+If
+.BR WRDSE_USERERR ,
+is returned, a pointer to the error description string must be stored in
+.BR *ret .
+.PP
+When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the
+data stored in \fB*ret\fR must be allocated using
+.BR malloc (3).
+.SS Pathname expansion
+Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is
+set. Each unquoted word is scanned for characters
+.BR * , ? ", and " [ .
+If one of these appears, the word is considered a \fIpattern\fR (in
+the sense of
+.BR glob (3))
+and is replaced with an alphabetically sorted list of file names matching the
+pattern.
+.PP
+If no matches are found for a word
+and the \fIws_options\fR member has the
+.B WRDSO_NULLGLOB
+bit set, the word is removed.
+.PP
+If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output
+for each such word using
+.IR ws_error .
+.PP
+When matching a pattern, the dot at the start of a name or immediately
+following a slash must be matched explicitly, unless
+the \fBWRDSO_DOTGLOB\fR option is set,
.SH WORDSPLIT_T STRUCTURE
The data type \fBwordsplit_t\fR has three members that contain
output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
@@ -264,8 +436,15 @@ If initialized on input, the
.B WRDSF_COMMENT
flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
.TP
-.BI "const char *" ws_escape
-Characters to be escaped with backslash. The
+.BI "const char *" ws_escape [2]
+Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted
+strings (\fBws_escape[1]\fR). These are used to translate escape
+sequences (\fB\\\fIC\fR) into characters. Each table is a string
+consisting of even number of charactes. In each pair of characters,
+the first one is a character that can appear after backslash, and the
+following one is its representation. For example, the string
+\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal
+tabulation character and \fB\\n\fR into newline.
.B WRDSF_ESCAPE
flag must be set if this member is initialized.
.TP
@@ -367,7 +546,7 @@ flag must be set.
const char *cmd,\
size_t len,\
char **argv,\
- void *clos)
+ void *clos)\fR
Pointer to the function that performs command substitution. It treats
the first \fIlen\fR bytes of the string \fIcmd\fR as a command
(whatever it means for the caller) and attempts to execute it. On
@@ -376,7 +555,7 @@ in the memory location pointed to by \fBret\fR and \fB0\fR is
returned. On error,
the function must return one of the error codes described in the section
.BR "ERROR CODES" .
-If \fIws_getvar\fR returns
+If \fIws_command\fR returns
.BR WRDSE_USERERR ,
it must store the pointer to the error description string in
.BR *ret .
@@ -555,7 +734,102 @@ for details.
The
.I ws_options
member is initialized.
+.SH OPTIONS
+The
+.I ws_options
+member is consulted if the
+.B WRDSF_OPTIONS
+flag is set. It contains a bitwise \fBOR\fR of one or more of the
+following options:
+.TP
+.B WRDSO_NULLGLOB
+Remove the words that produce empty string after pathname expansion.
+.TP
+.B WRDSO_FAILGLOB
+Output error message if pathname expansion produces empty string.
+.TP
+.B WRDSO_DOTGLOB
+During pathname expansion allow a leading period to be matched by
+metacharacters.
+.TP
+.B WRDSO_ARGV
+Split command invocation into words and pass the result to the
+\fIws_command\fR function in \fIargv\fR parameter.
+.PP
+.TP
+.B WRDSO_BSKEEP_WORD
+Quote removal: when an unrecognized escape sequence is encountered in a word,
+preserve it on output. If that bit is not set, the backslash is
+removed from such sequences.
+.TP
+.B WRDSO_OESC_WORD
+Quote removal: handle octal escapes in words.
+.TP
+.B WRDSO_XESC_WORD
+Quote removal: handle hex escapes in words.
+.TP
+.B WRDSO_BSKEEP_QUOTE
+Quote removal: when an unrecognized escape sequence is encountered in
+a doubly-quoted string, preserve it on output. If that bit is not
+set, the backslash is removed from such sequences.
+.TP
+.B WRDSO_OESC_QUOTE
+Quote removal: handle octal escapes in doubly-quoted strings.
+.TP
+.B WRDSO_XESC_QUOTE
+Quote removal: handle hex escapes in doubly-quoted strings.
+.SH "ERROR CODES"
+.TP
+.BR WRDSE_OK ", " WRDSE_EOF
+Successful return.
+.TP
+.B WRDSE_QUOTE
+Missing closing quote. The \fIws_endp\fR points to the position in
+the input string where the error occurred.
+.TP
+.B WRDSE_NOSPACE
+Memory exhausted.
+.TP
+.B WRDSE_USAGE
+Invalid wordsplit usage.
+.TP
+.B WRDSE_CBRACE
+Unbalanced curly brace.
+.TP
+.B WRDSE_UNDEF
+Undefined variable. This error is returned only if the
+\fBWRDSF_UNDEF\fR flag is set.
+.TP
+.B WRDSE_NOINPUT
+Input exhausted. This is not acually an error. This code is returned
+if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental
+mode and encounters end of input string. See the section
+.BR "INCREMENTAL MODE" .
+.TP
+.B WRDSE_PAREN
+Unbalanced parenthesis.
+.TP
+.B WRDSE_GLOBERR
+An error occurred during pattern matching.
+.TP
+.B WRDSE_USERERR
+User-defined error. Normally it is returned by \fBws_getvar\fR or
+\fBws_command\fR. Use the function
+.B wordsplit_strerror
+to get textual description of the error.
.SH "RETURN VALUE"
+Both
+.B wordsplit
+and
+.B wordsplit_len
+return \fB0\fR on success, and a non-zero error code on
+error (see the section
+.BR "ERROR CODES" ).
+.PP
+.B wordsplit_strerror
+returns a pointer to the constant string describing the last error
+condition that occurred in
+.IR ws .
.SH EXAMPLE
.SH "SEE ALSO"
.SH AUTHORS
diff --git a/src/wordsplit.c b/src/wordsplit.c
index c726239..671fcb9 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -176,6 +176,8 @@ wordsplit_init0 (struct wordsplit *wsp)
wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
@@ -234,6 +236,30 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_OPTIONS))
wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[0])
+ wsp->ws_escape[0] = "";
+ if (!wsp->ws_escape[1])
+ wsp->ws_escape[1] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[0] = wordsplit_c_escape_tab;
+ wsp->ws_escape[1] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[0] = "";
+ wsp->ws_escape[1] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
wsp->ws_endp = 0;
@@ -551,13 +577,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
return 0;
}
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
wsnode_quoteremoval (struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
@@ -585,11 +612,8 @@ wsnode_quoteremoval (struct wordsplit *wsp)
p->flags |= _WSNF_WORD;
}
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
}
return 0;
@@ -1825,35 +1849,6 @@ scan_word (struct wordsplit *wsp, size_t start)
return _WRDS_OK;
}
-static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
-
-int
-wordsplit_c_unquote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab; *p; p += 2)
- {
- if (*p == c)
- return p[1];
- }
- return c;
-}
-
-int
-wordsplit_c_quote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab + sizeof (quote_transtab) - 2;
- p > quote_transtab; p -= 2)
- {
- if (*p == c)
- return p[-1];
- }
- return -1;
-}
-
#define to_num(c) \
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
@@ -1894,7 +1889,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
len += 3;
else
{
- if (wordsplit_c_quote_char (*str) != -1)
+ if (wordsplit_c_quote_char (*str))
len += 2;
else
len += 4;
@@ -1903,47 +1898,56 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
return len;
}
-void
-wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
- const char *escapable)
+int
+wsplt_unquote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ while (*transtab && transtab[1])
{
- if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
- i++;
- *dst++ = src[i++];
+ if (*transtab++ == c)
+ return *transtab;
+ ++transtab;
}
- *dst = 0;
+ return 0;
}
-void
-wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
+int
+wsplt_quote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ for (; *transtab && transtab[1]; transtab += 2)
{
- if (src[i] == '\\')
- i++;
- *dst++ = src[i++];
+ if (transtab[1] == c)
+ return *transtab;
}
- *dst = 0;
+ return 0;
+}
+
+int
+wordsplit_c_unquote_char (int c)
+{
+ return wsplt_unquote_char (wordsplit_c_escape_tab, c);
+}
+
+int
+wordsplit_c_quote_char (int c)
+{
+ return wsplt_quote_char (wordsplit_c_escape_tab, c);
}
void
-wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
+wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src, size_t n)
{
int i = 0;
int c;
+ inquote = !!inquote;
while (i < n)
{
if (src[i] == '\\')
{
++i;
- if (src[i] == 'x' || src[i] == 'X')
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
+ && (src[i] == 'x' || src[i] == 'X'))
{
if (n - i < 2)
{
@@ -1966,7 +1970,8 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
- else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
+ else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
+ && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
{
if (n - i < 1)
{
@@ -1988,8 +1993,17 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
+ else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
+ {
+ *dst++ = c;
+ ++i;
+ }
else
- *dst++ = wordsplit_c_unquote_char (src[i++]);
+ {
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
+ *dst++ = '\\';
+ *dst++ = src[i++];
+ }
}
else
*dst++ = src[i++];
@@ -2023,7 +2037,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
{
int c = wordsplit_c_quote_char (*src);
*dst++ = '\\';
- if (c != -1)
+ if (c)
*dst++ = c;
else
{
diff --git a/src/wordsplit.h b/src/wordsplit.h
index 3c1d533..5f36b1e 100644
--- a/src/wordsplit.h
+++ b/src/wordsplit.h
@@ -43,7 +43,7 @@ struct wordsplit
Additional options. */
const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
- const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
+ const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
with backslash. */
void (*ws_alloc_die) (wordsplit_t *wsp);
/* [Input] (WRDSF_ALLOC_DIE) Function called when
@@ -184,13 +184,35 @@ struct wordsplit
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
/* Remove the word that produces empty string after path expansion */
-#define WRDSO_NULLGLOB 0x01
+#define WRDSO_NULLGLOB 0x00000001
/* Print error message if path expansion produces empty string */
-#define WRDSO_FAILGLOB 0x02
+#define WRDSO_FAILGLOB 0x00000002
/* Allow a leading period to be matched by metacharacters. */
-#define WRDSO_DOTGLOB 0x04
+#define WRDSO_DOTGLOB 0x00000004
/* ws_command needs argv parameter */
-#define WRDSO_ARGV 0x08
+#define WRDSO_ARGV 0x00000008
+/* Keep backslash in unrecognized escape sequences in words */
+#define WRDSO_BSKEEP_WORD 0x00000010
+/* Handle octal escapes in words */
+#define WRDSO_OESC_WORD 0x00000020
+/* Handle hex escapes in words */
+#define WRDSO_XESC_WORD 0x00000040
+
+/* Keep backslash in unrecognized escape sequences in quoted strings */
+#define WRDSO_BSKEEP_QUOTE 0x00000100
+/* Handle octal escapes in quoted strings */
+#define WRDSO_OESC_QUOTE 0x00000200
+/* Handle hex escapes in quoted strings */
+#define WRDSO_XESC_QUOTE 0x00000400
+
+#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
+#define WRDSO_OESC WRDSO_OESC_WORD
+#define WRDSO_XESC WRDSO_XESC_WORD
+
+/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
+#define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
+/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
+#define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
#define WRDSE_OK 0
#define WRDSE_EOF WRDSE_OK
@@ -215,10 +237,6 @@ int wordsplit_c_unquote_char (int c);
int wordsplit_c_quote_char (int c);
size_t wordsplit_c_quoted_length (const char *str, int quote_hex,
int *quote);
-void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
- const char *escapable);
-void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
-void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
void wordsplit_perror (wordsplit_t *ws);
diff --git a/tests/wordsplit.at b/tests/wordsplit.at
index eea5c0a..c466a07 100644
--- a/tests/wordsplit.at
+++ b/tests/wordsplit.at
@@ -362,8 +362,10 @@ TESTWSP([suppress ws trimming within quotes],
4: "formatfield=In message %{text}, "
])
+# FIXME: numbering
TESTWSP([unescape],
-[wsp-unescape wsp33],[-default novar nocmd quote escape '\"'],
+[wsp-unescape wsp-unescape-simple wsp33],
+[-default novar nocmd quote escape ':+:\\""'],
[\Seen "quote \"" "bs \\"],
[NF: 3
0: \\Seen
@@ -371,6 +373,21 @@ TESTWSP([unescape],
2: "bs \\"
])
+TESTWSP([unescape: word/quote],
+[wsp-unescape wsp-unescape-word wsp33],
+[-default novar nocmd quote escape-word '\\""' escape-quote ':+0x:\\""'],
+[\Seen "quote \"" "bs \\" "3\x31 \101" 3\x31 \101],
+[NF: 6
+0: Seen
+1: "quote \""
+2: "bs \\"
+3: "31 A"
+4: 3x31
+5: 101
+])
+
+# END FIXME
+
TESTWSP([dquote],[wsp34],[-default novar nocmd dquote],
[a "quoted example" isn't it],
[NF: 4
diff --git a/tests/wsp.c b/tests/wsp.c
index 58b2cee..1f97b5b 100644
--- a/tests/wsp.c
+++ b/tests/wsp.c
@@ -106,6 +106,8 @@ help ()
printf (" -%s\n", string_keytab[i].name);
printf (" %s ARG\n", string_keytab[i].name);
}
+ printf (" escape-word ARG\n");
+ printf (" escape-quote ARG\n");
putchar ('\n');
for (i = 0; opt_keytab[i].name; i++)
{
@@ -281,6 +283,41 @@ struct kwd env_keytab[] = {
{ NULL }
};
+static void
+set_escape_string (wordsplit_t *ws, int *wsflags, int q, const char *str)
+{
+ if (*str == ':')
+ {
+ while (*++str != ':')
+ {
+ int f;
+ switch (*str)
+ {
+ case '+':
+ f = WRDSO_BSKEEP;
+ break;
+
+ case '0':
+ f = WRDSO_OESC;
+ break;
+
+ case 'x':
+ f = WRDSO_XESC;
+ break;
+
+ default:
+ fprintf (stderr, "%s: invalid escape flag near %s\n",
+ progname, str);
+ abort ();
+ }
+ WRDSO_ESC_SET (ws, q, f);
+ }
+ *wsflags |= WRDSF_OPTIONS;
+ ++str;
+ }
+ ws->ws_escape[q] = str;
+}
+
int
main (int argc, char **argv)
{
@@ -397,7 +434,8 @@ main (int argc, char **argv)
break;
case WRDSF_ESCAPE:
- ws.ws_escape = argv[i];
+ set_escape_string (&ws, &wsflags, 0, argv[i]);
+ set_escape_string (&ws, &wsflags, 1, argv[i]);
break;
}
@@ -406,6 +444,27 @@ main (int argc, char **argv)
continue;
}
+ if (strcmp (opt, "escape-word") == 0
+ || strcmp (opt, "escape-quote") == 0)
+ {
+ int q = opt[7] == 'q';
+
+ i++;
+ if (i == argc)
+ {
+ fprintf (stderr, "%s: missing argument for %s\n",
+ progname, opt);
+ exit (1);
+ }
+ if (!(wsflags & WRDSF_ESCAPE))
+ {
+ wsflags |= WRDSF_ESCAPE;
+ ws.ws_escape[!q] = NULL;
+ }
+ set_escape_string (&ws, &wsflags, q, argv[i]);
+ continue;
+ }
+
if (strcmp (opt, "dooffs") == 0)
{
if (negate)

Return to:

Send suggestions and report system problems to the System administrator.