Fix quote removal in wordsplit.

* include/smap/wordsplit.h (WRDSF_CESCAPES): New flag. (wordsplit_unquote_char): Rename to (wordsplit_quote_char): Rename to wordsplit_c_quote_char. (wordsplit_quoted_length): Rename to wordsplit_c_quoted_length. (wordsplit_quote_copy): Rename to wordsplit_c_quote_copy. (wordsplit_unquote_copy): Rename to wordsplit_c_unquote_copy. (wordsplit_sh_unquote_copy): New prototype. * lib/wordsplit.c (wsnode_quoteremoval): New function. (wsnode_coalesce): Only coalesce adjacent nodes, do not modify them. (wordsplit_len): Introduce quote removal (actually, unescaping) pass.
author: Sergey Poznyakoff <gray@gnu.org.ua> 2010-06-24 13:25:23 +0300
committer: Sergey Poznyakoff <gray@gnu.org.ua> 2010-06-24 13:25:23 +0300
commit: 558fcc76286852c2dc360e32287aff8410de04f0 (patch)
tree: d4dca00a9db849d0d9e231ea95366e3c62bcbcb7
parent: c0e1d8a2cfa7dce23c0f3366de392dda2de4ce77 (diff)
download: smap-558fcc76286852c2dc360e32287aff8410de04f0.tar.gz
smap-558fcc76286852c2dc360e32287aff8410de04f0.tar.bz2
2 files changed, 88 insertions, 36 deletions
diff --git a/include/smap/wordsplit.h b/include/smap/wordsplit.h
index 04c5e65..ced28f3 100644
--- a/include/smap/wordsplit.h
+++ b/include/smap/wordsplit.h
@@ -92,10 +92,12 @@ struct wordsplit
 /* Keep undefined variables in place, instead of expanding them to
    empty string */
 #define WRDSF_KEEPUNDEF         0x200000
+/* Handle C escapes */
+#define WRDSF_CESCAPES          0x400000
 
 #define WRDSF_DEFFLAGS	       \
   (WRDSF_NOVAR | WRDSF_NOCMD | \
-   WRDSF_WS | WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS)
+   WRDSF_WS | WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
 
 #define WRDSE_EOF        0
 #define WRDSE_QUOTE      1
@@ -107,11 +109,12 @@ struct wordsplit
 int wordsplit(const char *s, struct wordsplit *p, int flags);
 void wordsplit_free(struct wordsplit *p);
 
-int wordsplit_unquote_char(int c);
-int wordsplit_quote_char(int c);
-size_t wordsplit_quoted_length(const char *str, int quote_hex, int *quote);
-void wordsplit_unquote_copy(char *dst, const char *src, size_t n);
-void wordsplit_quote_copy(char *dst, const char *src, int quote_hex);
+int wordsplit_c_unquote_char(int c);
+int wordsplit_c_quote_char(int c);
+size_t wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote);
+void wordsplit_sh_unquote_copy(char *dst, const char *src, size_t n);
+void wordsplit_c_unquote_copy(char *dst, const char *src, size_t n);
+void wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex);
 
 void wordsplit_perror(struct wordsplit *ws);
 const char *wordsplit_strerror(struct wordsplit *ws);
diff --git a/lib/wordsplit.c b/lib/wordsplit.c
index 14d2270..f46db1c 100644
--- a/lib/wordsplit.c
+++ b/lib/wordsplit.c
@@ -413,6 +413,39 @@ coalesce_segment(struct wordsplit *wsp, struct wordsplit_node *node)
 }
 
 static int
+wsnode_quoteremoval(struct wordsplit *wsp)
+{
+	struct wordsplit_node *p;
+	void (*uqfn)(char *, const char *, size_t) =
+		(wsp->ws_flags & WRDSF_CESCAPES) ?
+		  wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
+
+	for (p = wsp->ws_head; p; p = p->next) {
+		const char *str = wsnode_ptr(wsp, p);
+		size_t slen = wsnode_len(p);
+		int unquote;
+
+		if (wsp->ws_flags & WRDSF_QUOTE) {
+			unquote = !(p->flags & _WSNF_NOEXPAND);
+		} else
+			unquote = 0;
+
+		if (unquote) {
+			if (!(p->flags & _WSNF_WORD)) {
+				char *newstr = malloc(slen + 1);
+				if (!newstr)
+					return _wsplt_nomem(wsp);
+				memcpy(newstr, str, slen);
+				newstr[slen] = 0;
+				p->v.word = newstr;
+				p->flags |= _WSNF_WORD;
+			}
+			uqfn(p->v.word, str, slen);
+		}
+	}
+}
+
+static int
 wsnode_coalesce(struct wordsplit *wsp)
 {
 	struct wordsplit_node *p;
@@ -442,33 +475,24 @@ wordsplit_finish(struct wordsplit *wsp)
 	for (p = wsp->ws_head; p; p = p->next) {
 		const char *str = wsnode_ptr(wsp, p);
 		size_t slen = wsnode_len(p);
-		int unquote;
-		char *newstr;
+		char *newstr = malloc(slen + 1);
 
-		if (wsp->ws_flags & WRDSF_QUOTE) {
-			unquote = !(p->flags & _WSNF_NOEXPAND);
-		} else
-			unquote = 0;
-
-		if (p->flags & _WSNF_WORD) {
-			newstr = p->v.word;
-			p->v.word = NULL;
-		} else {
-			newstr = malloc(slen + 1);
-			if (!newstr)
-				return _wsplt_nomem(wsp);
-			memcpy(newstr, str, slen);
-			newstr[slen] = 0;
-		}
-		
-		if (unquote)
-			wordsplit_unquote_copy(newstr, str, slen);
+		/* Assign newstr first, even if it is NULL.  This way
+		   wordsplit_free will work even if we return
+		   nomem later. */
 		wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
+		if (!newstr)
+			return _wsplt_nomem(wsp);
+		memcpy(newstr, str, slen);
+		newstr[slen] = 0;
+		
 		wsp->ws_wordc++;
+		
 	}
 	wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
 	return 0;
 }
+
 
 /* Variable expansion */
 static int
@@ -916,7 +940,7 @@ scan_word(struct wordsplit *wsp, size_t start)
 static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\tv\v";
 
 int
-wordsplit_unquote_char(int c)
+wordsplit_c_unquote_char(int c)
 {
 	char *p;
 
@@ -928,7 +952,7 @@ wordsplit_unquote_char(int c)
 }
 
 int
-wordsplit_quote_char(int c)
+wordsplit_c_quote_char(int c)
 {
 	char *p;
 
@@ -959,7 +983,7 @@ xtonum(int *pval, const char *src, int base, int cnt)
 }
 
 size_t
-wordsplit_quoted_length(const char *str, int quote_hex, int *quote)
+wordsplit_c_quoted_length(const char *str, int quote_hex, int *quote)
 {
 	size_t len = 0;
 
@@ -976,7 +1000,7 @@ wordsplit_quoted_length(const char *str, int quote_hex, int *quote)
 		else if (quote_hex)
 			len += 3;
 		else {
-			if (wordsplit_quote_char(*str) != -1)
+			if (wordsplit_c_quote_char(*str) != -1)
 				len += 2;
 			else
 				len += 4;
@@ -986,7 +1010,21 @@ wordsplit_quoted_length(const char *str, int quote_hex, int *quote)
 }
 
 void
-wordsplit_unquote_copy(char *dst, const char *src, size_t n)
+wordsplit_sh_unquote_copy(char *dst, const char *src, size_t n)
+{
+	int i;
+	int c;
+
+	for (i = 0; i < n;) {
+		if (src[i] == '\\')
+			i++;
+		*dst++ = src[i++];
+	}
+	*dst = 0;
+}
+
+void
+wordsplit_c_unquote_copy(char *dst, const char *src, size_t n)
 {
 	int i = 0;
 	int c;
@@ -1026,7 +1064,7 @@ wordsplit_unquote_copy(char *dst, const char *src, size_t n)
 					}
 				}
 			} else
-				*dst++ = wordsplit_unquote_char(src[i++]);
+				*dst++ = wordsplit_c_unquote_char(src[i++]);
 		} else
 			*dst++ = src[i++];
 	}
@@ -1034,7 +1072,7 @@ wordsplit_unquote_copy(char *dst, const char *src, size_t n)
 }
 
 void
-wordsplit_quote_copy(char *dst, const char *src, int quote_hex)
+wordsplit_c_quote_copy(char *dst, const char *src, int quote_hex)
 {
 	for (; *src; src++) {
 		if (*src == '"') {
@@ -1051,7 +1089,7 @@ wordsplit_quote_copy(char *dst, const char *src, int quote_hex)
 				memcpy(dst, tmp, 3);
 				dst += 3;
 			} else {
-				int c = wordsplit_quote_char(*src);
+				int c = wordsplit_c_quote_char(*src);
 				*dst++ = '\\';
 				if (c != -1)
 					*dst++ = c;
@@ -1109,14 +1147,25 @@ wordsplit_len(const char *command, size_t len, struct wordsplit *wsp,
 			wordsplit_dump_nodes(wsp);
 		}
 	}
+
+	do {
+		if (wsnode_quoteremoval(wsp))
+			break;
+		if (wsp->ws_flags & WRDSF_DEBUG) {
+			wsp->ws_error("After quote removal:");
+			wordsplit_dump_nodes(wsp);
+		}
 	
-	if (wsnode_coalesce(wsp) == 0) {
+		if (wsnode_coalesce(wsp))
+			break;
+		
 		if (wsp->ws_flags & WRDSF_DEBUG) {
 			wsp->ws_error("Coalesced list:");
 			wordsplit_dump_nodes(wsp);
 		}
+
 		wordsplit_finish(wsp);
-	}
+	} while (0);
 	wordsplit_free_nodes(wsp);
 	return wsp->ws_errno;
 }
author	Sergey Poznyakoff <gray@gnu.org.ua>	2010-06-24 13:25:23 +0300
committer	Sergey Poznyakoff <gray@gnu.org.ua>	2010-06-24 13:25:23 +0300
commit	558fcc76286852c2dc360e32287aff8410de04f0 (patch)
tree	d4dca00a9db849d0d9e231ea95366e3c62bcbcb7
parent	c0e1d8a2cfa7dce23c0f3366de392dda2de4ce77 (diff)
download	smap-558fcc76286852c2dc360e32287aff8410de04f0.tar.gz smap-558fcc76286852c2dc360e32287aff8410de04f0.tar.bz2