summaryrefslogtreecommitdiffabout
path: root/src
authorSergey Poznyakoff <gray@gnu.org.ua>2014-10-30 14:58:00 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2015-12-17 13:26:28 (GMT)
commit59d4374b24e9f9f077f2e2e973fa75f3c3d505e0 (patch) (side-by-side diff)
tree0ac1fef3e04c45554b1f893fbc28a8fc2ba019ac /src
parent56a02e741cd8d8b9dce27a79ae9bbcaf1713c4f7 (diff)
downloadgrecs-59d4374b24e9f9f077f2e2e973fa75f3c3d505e0.tar.gz
grecs-59d4374b24e9f9f077f2e2e973fa75f3c3d505e0.tar.bz2
Finish wordsplit docs, improve tests
Diffstat (limited to 'src') (more/less context) (ignore whitespace changes)
-rw-r--r--src/wordsplit.c138
-rw-r--r--src/wordsplit.h36
2 files changed, 103 insertions, 71 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c
index c726239..671fcb9 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -176,6 +176,8 @@ wordsplit_init0 (struct wordsplit *wsp)
wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
@@ -234,6 +236,30 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_OPTIONS))
wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[0])
+ wsp->ws_escape[0] = "";
+ if (!wsp->ws_escape[1])
+ wsp->ws_escape[1] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[0] = wordsplit_c_escape_tab;
+ wsp->ws_escape[1] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[0] = "";
+ wsp->ws_escape[1] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
wsp->ws_endp = 0;
@@ -551,13 +577,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
return 0;
}
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
wsnode_quoteremoval (struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
@@ -585,11 +612,8 @@ wsnode_quoteremoval (struct wordsplit *wsp)
p->flags |= _WSNF_WORD;
}
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
}
return 0;
@@ -1825,35 +1849,6 @@ scan_word (struct wordsplit *wsp, size_t start)
return _WRDS_OK;
}
-static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
-
-int
-wordsplit_c_unquote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab; *p; p += 2)
- {
- if (*p == c)
- return p[1];
- }
- return c;
-}
-
-int
-wordsplit_c_quote_char (int c)
-{
- char *p;
-
- for (p = quote_transtab + sizeof (quote_transtab) - 2;
- p > quote_transtab; p -= 2)
- {
- if (*p == c)
- return p[-1];
- }
- return -1;
-}
-
#define to_num(c) \
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
@@ -1894,7 +1889,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
len += 3;
else
{
- if (wordsplit_c_quote_char (*str) != -1)
+ if (wordsplit_c_quote_char (*str))
len += 2;
else
len += 4;
@@ -1903,47 +1898,56 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
return len;
}
-void
-wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
- const char *escapable)
+int
+wsplt_unquote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ while (*transtab && transtab[1])
{
- if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
- i++;
- *dst++ = src[i++];
+ if (*transtab++ == c)
+ return *transtab;
+ ++transtab;
}
- *dst = 0;
+ return 0;
}
-void
-wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
+int
+wsplt_quote_char (const char *transtab, int c)
{
- int i;
-
- for (i = 0; i < n;)
+ for (; *transtab && transtab[1]; transtab += 2)
{
- if (src[i] == '\\')
- i++;
- *dst++ = src[i++];
+ if (transtab[1] == c)
+ return *transtab;
}
- *dst = 0;
+ return 0;
+}
+
+int
+wordsplit_c_unquote_char (int c)
+{
+ return wsplt_unquote_char (wordsplit_c_escape_tab, c);
+}
+
+int
+wordsplit_c_quote_char (int c)
+{
+ return wsplt_quote_char (wordsplit_c_escape_tab, c);
}
void
-wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
+wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src, size_t n)
{
int i = 0;
int c;
+ inquote = !!inquote;
while (i < n)
{
if (src[i] == '\\')
{
++i;
- if (src[i] == 'x' || src[i] == 'X')
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
+ && (src[i] == 'x' || src[i] == 'X'))
{
if (n - i < 2)
{
@@ -1966,7 +1970,8 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
- else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
+ else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
+ && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
{
if (n - i < 1)
{
@@ -1988,8 +1993,17 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
}
}
}
+ else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
+ {
+ *dst++ = c;
+ ++i;
+ }
else
- *dst++ = wordsplit_c_unquote_char (src[i++]);
+ {
+ if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
+ *dst++ = '\\';
+ *dst++ = src[i++];
+ }
}
else
*dst++ = src[i++];
@@ -2023,7 +2037,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
{
int c = wordsplit_c_quote_char (*src);
*dst++ = '\\';
- if (c != -1)
+ if (c)
*dst++ = c;
else
{
diff --git a/src/wordsplit.h b/src/wordsplit.h
index 3c1d533..5f36b1e 100644
--- a/src/wordsplit.h
+++ b/src/wordsplit.h
@@ -43,7 +43,7 @@ struct wordsplit
Additional options. */
const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
- const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
+ const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
with backslash. */
void (*ws_alloc_die) (wordsplit_t *wsp);
/* [Input] (WRDSF_ALLOC_DIE) Function called when
@@ -184,13 +184,35 @@ struct wordsplit
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
/* Remove the word that produces empty string after path expansion */
-#define WRDSO_NULLGLOB 0x01
+#define WRDSO_NULLGLOB 0x00000001
/* Print error message if path expansion produces empty string */
-#define WRDSO_FAILGLOB 0x02
+#define WRDSO_FAILGLOB 0x00000002
/* Allow a leading period to be matched by metacharacters. */
-#define WRDSO_DOTGLOB 0x04
+#define WRDSO_DOTGLOB 0x00000004
/* ws_command needs argv parameter */
-#define WRDSO_ARGV 0x08
+#define WRDSO_ARGV 0x00000008
+/* Keep backslash in unrecognized escape sequences in words */
+#define WRDSO_BSKEEP_WORD 0x00000010
+/* Handle octal escapes in words */
+#define WRDSO_OESC_WORD 0x00000020
+/* Handle hex escapes in words */
+#define WRDSO_XESC_WORD 0x00000040
+
+/* Keep backslash in unrecognized escape sequences in quoted strings */
+#define WRDSO_BSKEEP_QUOTE 0x00000100
+/* Handle octal escapes in quoted strings */
+#define WRDSO_OESC_QUOTE 0x00000200
+/* Handle hex escapes in quoted strings */
+#define WRDSO_XESC_QUOTE 0x00000400
+
+#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
+#define WRDSO_OESC WRDSO_OESC_WORD
+#define WRDSO_XESC WRDSO_XESC_WORD
+
+/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
+#define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
+/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
+#define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
#define WRDSE_OK 0
#define WRDSE_EOF WRDSE_OK
@@ -215,10 +237,6 @@ int wordsplit_c_unquote_char (int c);
int wordsplit_c_quote_char (int c);
size_t wordsplit_c_quoted_length (const char *str, int quote_hex,
int *quote);
-void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
- const char *escapable);
-void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
-void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
void wordsplit_perror (wordsplit_t *ws);

Return to:

Send suggestions and report system problems to the System administrator.