summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2014-02-07 17:40:07 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2014-02-07 17:40:07 (GMT)
commit4be79061e8f68f6e3174a05452d96f31e8062464 (patch) (unidiff)
treec93924dc62e3f71f76cf29f98cb1c8d164e228a8
parentc9b5abe560c2fe06368cb733df8bcbfdb33a8526 (diff)
downloadcflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.gz
cflow-4be79061e8f68f6e3174a05452d96f31e8062464.tar.bz2
Use wordsplit.[ch] (from grecs) instead of the obsolete argcv.[ch]
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--src/Makefile.am6
-rw-r--r--src/argcv.c410
-rw-r--r--src/argcv.h52
-rw-r--r--src/rc.c42
-rw-r--r--src/wordsplit.c1624
-rw-r--r--src/wordsplit.h159
6 files changed, 1813 insertions, 480 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index bad0476..e7f0b85 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -18,8 +18,6 @@
18 18
19bin_PROGRAMS = cflow 19bin_PROGRAMS = cflow
20cflow_SOURCES = \ 20cflow_SOURCES = \
21 argcv.c\
22 argcv.h\
23 c.l\ 21 c.l\
24 cflow.h\ 22 cflow.h\
25 depmap.c\ 23 depmap.c\
@@ -31,7 +29,9 @@ cflow_SOURCES = \
31 parser.h\ 29 parser.h\
32 posix.c\ 30 posix.c\
33 rc.c\ 31 rc.c\
34 symbol.c 32 symbol.c\
33 wordsplit.c\
34 wordsplit.h
35 35
36localedir = $(datadir)/locale 36localedir = $(datadir)/locale
37 37
diff --git a/src/argcv.c b/src/argcv.c
deleted file mode 100644
index 611cb64..0000000
--- a/src/argcv.c
+++ b/dev/null
@@ -1,410 +0,0 @@
1/* argcv.c - simple functions for parsing input based on whitespace
2 Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 3 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16*/
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <ctype.h>
23#include <errno.h>
24#include <argcv.h>
25
26/*
27 * takes a string and splits it into several strings, breaking at ' '
28 * command is the string to split
29 * the number of strings is placed into argc
30 * the split strings are put into argv
31 * returns 0 on success, nonzero on failure
32 */
33
34#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n')
35#define isdelim(c,delim) (strchr(delim,(c))!=NULL)
36
37static int
38argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
39 int *start, int *end, int *save)
40{
41 int i = 0;
42
43 for (;;)
44 {
45 i = *save;
46
47 if (i >= len)
48 return i + 1;
49
50 /* Skip initial whitespace */
51 while (i < len && isws (command[i]))
52 i++;
53 *start = i;
54
55 if (!isdelim (command[i], delim))
56 {
57 while (i < len)
58 {
59 if (command[i] == '\\')
60 {
61 if (++i == len)
62 break;
63 i++;
64 continue;
65 }
66
67 if (command[i] == '\'' || command[i] == '"')
68 {
69 int j;
70 for (j = i+1; j < len && command[j] != command[i]; j++)
71 if (command[j] == '\\')
72 j++;
73 if (j < len)
74 i = j+1;
75 else
76 i++;
77 }
78 else if (isws (command[i]) || isdelim (command[i], delim))
79 break;
80 else
81 i++; /* skip the escaped character */
82 }
83 i--;
84 }
85
86 *end = i;
87 *save = i + 1;
88
89 /* If we have a token, and it starts with a comment character, skip
90 to the newline and restart the token search. */
91 if (*save <= len)
92 {
93 if (cmnt && strchr (cmnt, command[*start]) != NULL)
94 {
95 i = *save;
96 while (i < len && command[i] != '\n')
97 i++;
98
99 *save = i;
100 continue;
101 }
102 }
103 break;
104 }
105 return *save;
106}
107
108static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t";
109
110int
111argcv_unquote_char (int c)
112{
113 char *p;
114
115 for (p = quote_transtab; *p; p += 2)
116 {
117 if (*p == c)
118 return p[1];
119 }
120 return c;
121}
122
123int
124argcv_quote_char (int c)
125{
126 char *p;
127
128 for (p = quote_transtab + sizeof(quote_transtab) - 2;
129 p > quote_transtab; p -= 2)
130 {
131 if (*p == c)
132 return p[-1];
133 }
134 return -1;
135}
136
137#define to_num(c) \
138 (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 ))
139
140static int
141xtonum (int *pval, const char *src, int base, int cnt)
142{
143 int i, val;
144
145 for (i = 0, val = 0; i < cnt; i++, src++)
146 {
147 int n = *(unsigned char*)src;
148 if (n > 127 || (n = to_num(n)) >= base)
149 break;
150 val = val*base + n;
151 }
152 *pval = val;
153 return i;
154}
155
156size_t
157argcv_quoted_length (const char *str, int *quote)
158{
159 size_t len = 0;
160
161 *quote = 0;
162 for (; *str; str++)
163 {
164 if (*str == ' ')
165 {
166 len++;
167 *quote = 1;
168 }
169 else if (*str == '"' || *str == '\'')
170 {
171 len += 2;
172 *quote = 1;
173 }
174 else if (*str != '\t' && *str != '\\' && isprint (*str))
175 len++;
176 else if (argcv_quote_char (*str) != -1)
177 len += 2;
178 else
179 len += 4;
180 }
181 return len;
182}
183
184void
185argcv_unquote_copy (char *dst, const char *src, size_t n)
186{
187 int i = 0;
188 int c;
189 int expect_delim = 0;
190
191 while (i < n)
192 {
193 switch (src[i])
194 {
195 case '\'':
196 case '"':
197 if (!expect_delim)
198 {
199 const char *p;
200
201 for (p = src+i+1; *p && *p != src[i]; p++)
202 if (*p == '\\')
203 p++;
204 if (*p)
205 expect_delim = src[i++];
206 else
207 *dst++ = src[i++];
208 }
209 else if (expect_delim == src[i])
210 ++i;
211 else
212 *dst++ = src[i++];
213 break;
214
215 case '\\':
216 ++i;
217 if (src[i] == 'x' || src[i] == 'X')
218 {
219 if (n - i < 2)
220 {
221 *dst++ = '\\';
222 *dst++ = src[i++];
223 }
224 else
225 {
226 int off = xtonum(&c, src + i + 1, 16, 2);
227 if (off == 0)
228 {
229 *dst++ = '\\';
230 *dst++ = src[i++];
231 }
232 else
233 {
234 *dst++ = c;
235 i += off + 1;
236 }
237 }
238 }
239 else if ((unsigned char)src[i] < 128 && isdigit(src[i]))
240 {
241 if (n - i < 1)
242 {
243 *dst++ = '\\';
244 *dst++ = src[i++];
245 }
246 else
247 {
248 int off = xtonum(&c, src+i, 8, 3);
249 if (off == 0)
250 {
251 *dst++ = '\\';
252 *dst++ = src[i++];
253 }
254 else
255 {
256 *dst++ = c;
257 i += off;
258 }
259 }
260 }
261 else
262 *dst++ = argcv_unquote_char (src[i++]);
263 break;
264
265 default:
266 *dst++ = src[i++];
267 }
268 }
269 *dst = 0;
270}
271
272void
273argcv_quote_copy (char *dst, const char *src)
274{
275 for (; *src; src++)
276 {
277 if (*src == '"' || *src == '\'')
278 {
279 *dst++ = '\\';
280 *dst++ = *src;
281 }
282 else if (*src != '\t' && *src != '\\' && isprint(*src))
283 *dst++ = *src;
284 else
285 {
286 int c = argcv_quote_char (*src);
287 *dst++ = '\\';
288 if (c != -1)
289 *dst++ = c;
290 else
291 {
292 char tmp[4];
293 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src);
294 memcpy (dst, tmp, 3);
295 dst += 3;
296 }
297 }
298 }
299}
300
301int
302argcv_get (const char *command, const char *delim, const char *cmnt,
303 int *argc, char ***argv)
304{
305 int len = strlen (command);
306 int i = 0;
307 int start, end, save;
308
309 *argv = NULL;
310
311 /* Count number of arguments */
312 *argc = 0;
313 save = 0;
314
315 while (argcv_scan (len, command, delim, cmnt, &start, &end, &save) <= len)
316 (*argc)++;
317
318 *argv = calloc ((*argc + 1), sizeof (char *));
319 if (*argv == NULL)
320 return ENOMEM;
321
322 i = 0;
323 save = 0;
324 for (i = 0; i < *argc; i++)
325 {
326 int n;
327 argcv_scan (len, command, delim, cmnt, &start, &end, &save);
328
329 if ((command[start] == '"' || command[end] == '\'')
330 && command[end] == command[start])
331 {
332 start++;
333 end--;
334 }
335 n = end - start + 1;
336 (*argv)[i] = calloc (n+1, sizeof (char));
337 if ((*argv)[i] == NULL)
338 return ENOMEM;
339 argcv_unquote_copy ((*argv)[i], &command[start], n);
340 (*argv)[i][n] = 0;
341 }
342 (*argv)[i] = NULL;
343 return 0;
344}
345
346/*
347 * frees all elements of an argv array
348 * argc is the number of elements
349 * argv is the array
350 */
351int
352argcv_free (int argc, char **argv)
353{
354 while (--argc >= 0)
355 if (argv[argc])
356 free (argv[argc]);
357 free (argv);
358 return 0;
359}
360
361/* Take a argv an make string separated by ' '. */
362
363int
364argcv_string (int argc, char **argv, char **pstring)
365{
366 size_t i, j, len;
367 char *buffer;
368
369 /* No need. */
370 if (pstring == NULL)
371 return EINVAL;
372
373 buffer = malloc (1);
374 if (buffer == NULL)
375 return ENOMEM;
376 *buffer = '\0';
377
378 for (len = i = j = 0; i < argc; i++)
379 {
380 int quote;
381 int toklen;
382
383 toklen = argcv_quoted_length (argv[i], &quote);
384
385 len += toklen + 2;
386 if (quote)
387 len += 2;
388
389 buffer = realloc (buffer, len);
390 if (buffer == NULL)
391 return ENOMEM;
392
393 if (i != 0)
394 buffer[j++] = ' ';
395 if (quote)
396 buffer[j++] = '"';
397 argcv_quote_copy (buffer + j, argv[i]);
398 j += toklen;
399 if (quote)
400 buffer[j++] = '"';
401 }
402
403 for (; j > 0 && isspace (buffer[j-1]); j--)
404 ;
405 buffer[j] = 0;
406 if (pstring)
407 *pstring = buffer;
408 return 0;
409}
410
diff --git a/src/argcv.h b/src/argcv.h
deleted file mode 100644
index c082709..0000000
--- a/src/argcv.h
+++ b/dev/null
@@ -1,52 +0,0 @@
1/* GNU Mailutils -- a suite of utilities for electronic mail
2 Copyright (C) 1999, 2000, 2001, 2005, 2007 Free Software Foundation, Inc.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 3 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifndef _ARGCV_H
18#define _ARGCV_H 1
19
20#include <stdio.h>
21#include <stdlib.h>
22#include <unistd.h>
23#include <string.h>
24
25#ifdef __cplusplus
26extern "C" {
27#endif
28
29#ifndef __P
30# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
31# define __P(args) args
32# else
33# define __P(args) ()
34# endif
35#endif /*__P */
36
37extern int argcv_get __P ((const char *command, const char *delim,
38 const char* cmnt,
39 int *argc, char ***argv));
40extern int argcv_string __P ((int argc, char **argv, char **string));
41extern int argcv_free __P ((int argc, char **argv));
42extern int argcv_unquote_char __P((int c));
43extern int argcv_quote_char __P((int c));
44extern size_t argcv_quoted_length __P((const char *str, int *quote));
45extern void argcv_unquote_copy __P((char *dst, const char *src, size_t n));
46extern void argcv_quote_copy __P((char *dst, const char *src));
47
48#ifdef __cplusplus
49}
50#endif
51
52#endif /* _ARGCV_H */
diff --git a/src/rc.c b/src/rc.c
index bb9d25a..7586975 100644
--- a/src/rc.c
+++ b/src/rc.c
@@ -18,7 +18,7 @@
18#include <parser.h> 18#include <parser.h>
19#include <sys/stat.h> 19#include <sys/stat.h>
20#include <ctype.h> 20#include <ctype.h>
21#include <argcv.h> 21#include <wordsplit.h>
22 22
23#ifndef LOCAL_RC 23#ifndef LOCAL_RC
24# define LOCAL_RC ".cflowrc" 24# define LOCAL_RC ".cflowrc"
@@ -31,8 +31,9 @@ expand_argcv(int *argc_ptr, char ***argv_ptr, int argc, char **argv)
31 31
32 *argv_ptr = xrealloc(*argv_ptr, 32 *argv_ptr = xrealloc(*argv_ptr,
33 (*argc_ptr + argc + 1) * sizeof **argv_ptr); 33 (*argc_ptr + argc + 1) * sizeof **argv_ptr);
34 for (i = 0; i <= argc; i++) 34 for (i = 0; i < argc; i++)
35 (*argv_ptr)[*argc_ptr + i] = argv[i]; 35 (*argv_ptr)[*argc_ptr + i] = xstrdup(argv[i]);
36 (*argv_ptr)[*argc_ptr + i] = NULL;
36 *argc_ptr += argc; 37 *argc_ptr += argc;
37} 38}
38 39
@@ -45,6 +46,9 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name)
45 FILE *rcfile; 46 FILE *rcfile;
46 int size; 47 int size;
47 char *buf, *p; 48 char *buf, *p;
49 struct wordsplit ws;
50 int wsflags;
51 int line;
48 52
49 if (stat(name, &st)) 53 if (stat(name, &st))
50 return; 54 return;
@@ -62,14 +66,19 @@ parse_rc(int *argc_ptr, char ***argv_ptr, char *name)
62 buf[size] = 0; 66 buf[size] = 0;
63 fclose(rcfile); 67 fclose(rcfile);
64 68
69 ws.ws_comment = "#";
70 wsflags = WRDSF_DEFFLAGS | WRDSF_COMMENT;
71 line = 0;
65 for (p = strtok(buf, "\n"); p; p = strtok(NULL, "\n")) { 72 for (p = strtok(buf, "\n"); p; p = strtok(NULL, "\n")) {
66 int argc; 73 ++line;
67 char **argv; 74 if (wordsplit(p, &ws, wsflags))
68 75 error(1, 0, "%s:%d: %s", name, line, wordsplit_strerror(&ws));
69 argcv_get(p, "", "#", &argc, &argv); 76 wsflags |= WRDSF_REUSE;
70 expand_argcv(argc_ptr, argv_ptr, argc, argv); 77 if (ws.ws_wordc)
71 free(argv); 78 expand_argcv(argc_ptr, argv_ptr, ws.ws_wordc, ws.ws_wordv);
72 } 79 }
80 if (wsflags & WRDSF_REUSE)
81 wordsplit_free(&ws);
73 free(buf); 82 free(buf);
74} 83}
75 84
@@ -94,12 +103,15 @@ sourcerc(int *argc_ptr, char ***argv_ptr)
94 103
95 env = getenv("CFLOW_OPTIONS"); 104 env = getenv("CFLOW_OPTIONS");
96 if (env) { 105 if (env) {
97 int argc; 106 struct wordsplit ws;
98 char **argv; 107
99 108 ws.ws_comment = "#";
100 argcv_get(env, "", "#", &argc, &argv); 109 if (wordsplit(env, &ws, WRDSF_DEFFLAGS | WRDSF_COMMENT))
101 expand_argcv(&xargc, &xargv, argc, argv); 110 error(1, 0, "failed to parse CFLOW_OPTIONS: %s",
102 free(argv); 111 wordsplit_strerror(&ws));
112 if (ws.ws_wordc)
113 expand_argcv(&xargc, &xargv, ws.ws_wordc, ws.ws_wordv);
114 wordsplit_free(&ws);
103 } 115 }
104 116
105 env = getenv("CFLOWRC"); 117 env = getenv("CFLOWRC");
diff --git a/src/wordsplit.c b/src/wordsplit.c
new file mode 100644
index 0000000..9047369
--- a/dev/null
+++ b/src/wordsplit.c
@@ -0,0 +1,1624 @@
1/* wordsplit - a word splitter
2 Copyright (C) 2009-2012 Sergey Poznyakoff
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <ctype.h>
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <stdarg.h>
28
29#if ENABLE_NLS
30# include <gettext.h>
31#else
32# define gettext(msgid) msgid
33#endif
34#define _(msgid) gettext (msgid)
35#define N_(msgid) msgid
36
37#include <wordsplit.h>
38
39#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
40#define ISDELIM(ws,c) \
41 (strchr ((ws)->ws_delim, (c)) != NULL)
42#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
43#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
44#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
45#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
46#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
47#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
48#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
49#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
50
51#define ALLOC_INIT 128
52#define ALLOC_INCR 128
53
54static void
55_wsplt_alloc_die (struct wordsplit *wsp)
56{
57 wsp->ws_error (_("memory exhausted"));
58 abort ();
59}
60
61static void
62_wsplt_error (const char *fmt, ...)
63{
64 va_list ap;
65
66 va_start (ap, fmt);
67 vfprintf (stderr, fmt, ap);
68 va_end (ap);
69 fputc ('\n', stderr);
70}
71
72static void wordsplit_free_nodes (struct wordsplit *);
73
74static int
75_wsplt_nomem (struct wordsplit *wsp)
76{
77 errno = ENOMEM;
78 wsp->ws_errno = WRDSE_NOSPACE;
79 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
80 wsp->ws_alloc_die (wsp);
81 if (wsp->ws_flags & WRDSF_SHOWERR)
82 wordsplit_perror (wsp);
83 if (!(wsp->ws_flags & WRDSF_REUSE))
84 wordsplit_free (wsp);
85 wordsplit_free_nodes (wsp);
86 return wsp->ws_errno;
87}
88
89static void
90wordsplit_init0 (struct wordsplit *wsp)
91{
92 if (wsp->ws_flags & WRDSF_REUSE)
93 {
94 if (!(wsp->ws_flags & WRDSF_APPEND))
95 wordsplit_free_words (wsp);
96 }
97 else
98 {
99 wsp->ws_wordv = NULL;
100 wsp->ws_wordc = 0;
101 wsp->ws_wordn = 0;
102 }
103
104 wsp->ws_errno = 0;
105 wsp->ws_head = wsp->ws_tail = NULL;
106}
107
108static int
109wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
110 int flags)
111{
112 wsp->ws_flags = flags;
113
114 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
115 wsp->ws_alloc_die = _wsplt_alloc_die;
116 if (!(wsp->ws_flags & WRDSF_ERROR))
117 wsp->ws_error = _wsplt_error;
118
119 if (!(wsp->ws_flags & WRDSF_NOVAR)
120 && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
121 {
122 errno = EINVAL;
123 wsp->ws_errno = WRDSE_USAGE;
124 if (wsp->ws_flags & WRDSF_SHOWERR)
125 wordsplit_perror (wsp);
126 return wsp->ws_errno;
127 }
128
129 if (!(wsp->ws_flags & WRDSF_NOCMD))
130 {
131 errno = EINVAL;
132 wsp->ws_errno = WRDSE_NOSUPP;
133 if (wsp->ws_flags & WRDSF_SHOWERR)
134 wordsplit_perror (wsp);
135 return wsp->ws_errno;
136 }
137
138 if (wsp->ws_flags & WRDSF_SHOWDBG)
139 {
140 if (!(wsp->ws_flags & WRDSF_DEBUG))
141 {
142 if (wsp->ws_flags & WRDSF_ERROR)
143 wsp->ws_debug = wsp->ws_error;
144 else if (wsp->ws_flags & WRDSF_SHOWERR)
145 wsp->ws_debug = _wsplt_error;
146 else
147 wsp->ws_flags &= ~WRDSF_SHOWDBG;
148 }
149 }
150
151 wsp->ws_input = input;
152 wsp->ws_len = len;
153
154 if (!(wsp->ws_flags & WRDSF_DOOFFS))
155 wsp->ws_offs = 0;
156
157 if (!(wsp->ws_flags & WRDSF_DELIM))
158 wsp->ws_delim = " \t\n";
159
160 if (!(wsp->ws_flags & WRDSF_COMMENT))
161 wsp->ws_comment = NULL;
162
163 if (!(wsp->ws_flags & WRDSF_CLOSURE))
164 wsp->ws_closure = NULL;
165
166 wsp->ws_endp = 0;
167
168 wordsplit_init0 (wsp);
169
170 return 0;
171}
172
173static int
174alloc_space (struct wordsplit *wsp, size_t count)
175{
176 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
177 char **ptr;
178 size_t newalloc;
179
180 if (wsp->ws_wordv == NULL)
181 {
182 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
183 ptr = calloc (newalloc, sizeof (ptr[0]));
184 }
185 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
186 {
187 newalloc = offs + wsp->ws_wordc +
188 (count > ALLOC_INCR ? count : ALLOC_INCR);
189 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
190 }
191 else
192 return 0;
193
194 if (ptr)
195 {
196 wsp->ws_wordn = newalloc;
197 wsp->ws_wordv = ptr;
198 }
199 else
200 return _wsplt_nomem (wsp);
201 return 0;
202}
203
204
205/* Node state flags */
206 #define _WSNF_NULL 0x01/* null node (a noop) */
207 #define _WSNF_WORD 0x02/* node contains word in v.word */
208 #define _WSNF_QUOTE 0x04/* text is quoted */
209 #define _WSNF_NOEXPAND 0x08/* text is not subject to expansion */
210 #define _WSNF_JOIN 0x10/* node must be joined with the next node */
211 #define _WSNF_SEXP 0x20/* is a sed expression */
212
213 #define _WSNF_EMPTYOK 0x0100/* special flag indicating that
214 wordsplit_add_segm must add the
215 segment even if it is empty */
216
217struct wordsplit_node
218{
219 struct wordsplit_node *prev;/* Previous element */
220 struct wordsplit_node *next;/* Next element */
221 int flags; /* Node flags */
222 union
223 {
224 struct
225 {
226 size_t beg; /* Start of word in ws_input */
227 size_t end; /* End of word in ws_input */
228 } segm;
229 char *word;
230 } v;
231};
232
233static const char *
234wsnode_flagstr (int flags)
235{
236 static char retbuf[6];
237 char *p = retbuf;
238
239 if (flags & _WSNF_WORD)
240 *p++ = 'w';
241 else if (flags & _WSNF_NULL)
242 *p++ = 'n';
243 else
244 *p++ = '-';
245 if (flags & _WSNF_QUOTE)
246 *p++ = 'q';
247 else
248 *p++ = '-';
249 if (flags & _WSNF_NOEXPAND)
250 *p++ = 'E';
251 else
252 *p++ = '-';
253 if (flags & _WSNF_JOIN)
254 *p++ = 'j';
255 else
256 *p++ = '-';
257 if (flags & _WSNF_SEXP)
258 *p++ = 's';
259 else
260 *p++ = '-';
261 *p = 0;
262 return retbuf;
263}
264
265static const char *
266wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
267{
268 if (p->flags & _WSNF_NULL)
269 return "";
270 else if (p->flags & _WSNF_WORD)
271 return p->v.word;
272 else
273 return wsp->ws_input + p->v.segm.beg;
274}
275
276static size_t
277wsnode_len (struct wordsplit_node *p)
278{
279 if (p->flags & _WSNF_NULL)
280 return 0;
281 else if (p->flags & _WSNF_WORD)
282 return strlen (p->v.word);
283 else
284 return p->v.segm.end - p->v.segm.beg;
285}
286
287static int
288wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
289{
290 struct wordsplit_node *node = calloc (1, sizeof (*node));
291 if (!node)
292 return _wsplt_nomem (wsp);
293 *pnode = node;
294 return 0;
295}
296
297static void
298wsnode_free (struct wordsplit_node *p)
299{
300 if (p->flags & _WSNF_WORD)
301 free (p->v.word);
302 free (p);
303}
304
305static void
306wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
307{
308 node->next = NULL;
309 node->prev = wsp->ws_tail;
310 if (wsp->ws_tail)
311 wsp->ws_tail->next = node;
312 else
313 wsp->ws_head = node;
314 wsp->ws_tail = node;
315}
316
317static void
318wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
319{
320 struct wordsplit_node *p;
321
322 p = node->prev;
323 if (p)
324 {
325 p->next = node->next;
326 if (!node->next)
327 p->flags &= ~_WSNF_JOIN;
328 }
329 else
330 wsp->ws_head = node->next;
331
332 p = node->next;
333 if (p)
334 p->prev = node->prev;
335 else
336 wsp->ws_tail = node->prev;
337
338 node->next = node->prev = NULL;
339}
340
341static void
342wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
343 struct wordsplit_node *anchor, int before)
344{
345 if (!wsp->ws_head)
346 {
347 node->next = node->prev = NULL;
348 wsp->ws_head = wsp->ws_tail = node;
349 }
350 else if (before)
351 {
352 if (anchor->prev)
353 wsnode_insert (wsp, node, anchor->prev, 0);
354 else
355 {
356 node->prev = NULL;
357 node->next = anchor;
358 anchor->prev = node;
359 wsp->ws_head = node;
360 }
361 }
362 else
363 {
364 struct wordsplit_node *p;
365
366 p = anchor->next;
367 if (p)
368 p->prev = node;
369 else
370 wsp->ws_tail = node;
371 node->next = p;
372 node->prev = anchor;
373 anchor->next = node;
374 }
375}
376
377static int
378wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
379{
380 struct wordsplit_node *node;
381 int rc;
382
383 if (end == beg && !(flg & _WSNF_EMPTYOK))
384 return 0;
385 rc = wsnode_new (wsp, &node);
386 if (rc)
387 return rc;
388 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
389 node->v.segm.beg = beg;
390 node->v.segm.end = end;
391 wsnode_append (wsp, node);
392 return 0;
393}
394
395static void
396wordsplit_free_nodes (struct wordsplit *wsp)
397{
398 struct wordsplit_node *p;
399
400 for (p = wsp->ws_head; p;)
401 {
402 struct wordsplit_node *next = p->next;
403 wsnode_free (p);
404 p = next;
405 }
406 wsp->ws_head = wsp->ws_tail = NULL;
407}
408
409static void
410wordsplit_dump_nodes (struct wordsplit *wsp)
411{
412 struct wordsplit_node *p;
413 int n = 0;
414
415 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
416 {
417 if (p->flags & _WSNF_WORD)
418 wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
419 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
420 else
421 wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
422 n, p, p->flags, wsnode_flagstr (p->flags),
423 (int) (p->v.segm.end - p->v.segm.beg),
424 wsp->ws_input + p->v.segm.beg);
425 }
426}
427
428static int
429coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
430{
431 struct wordsplit_node *p, *end;
432 size_t len = 0;
433 char *buf, *cur;
434 int stop;
435
436 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
437 {
438 len += wsnode_len (p);
439 }
440 if (p)
441 len += wsnode_len (p);
442 end = p;
443
444 buf = malloc (len + 1);
445 if (!buf)
446 return _wsplt_nomem (wsp);
447 cur = buf;
448
449 p = node;
450 for (stop = 0; !stop;)
451 {
452 struct wordsplit_node *next = p->next;
453 const char *str = wsnode_ptr (wsp, p);
454 size_t slen = wsnode_len (p);
455
456 memcpy (cur, str, slen);
457 cur += slen;
458 if (p != node)
459 {
460 wsnode_remove (wsp, p);
461 stop = p == end;
462 wsnode_free (p);
463 }
464 p = next;
465 }
466
467 *cur = 0;
468
469 node->flags &= ~_WSNF_JOIN;
470
471 if (node->flags & _WSNF_WORD)
472 free (node->v.word);
473 else
474 node->flags |= _WSNF_WORD;
475 node->v.word = buf;
476 return 0;
477}
478
479static int
480wsnode_quoteremoval (struct wordsplit *wsp)
481{
482 struct wordsplit_node *p;
483 void (*uqfn) (char *, const char *, size_t) =
484 (wsp->ws_flags & WRDSF_CESCAPES) ?
485 wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
486
487 for (p = wsp->ws_head; p; p = p->next)
488 {
489 const char *str = wsnode_ptr (wsp, p);
490 size_t slen = wsnode_len (p);
491 int unquote;
492
493 if (wsp->ws_flags & WRDSF_QUOTE)
494 {
495 unquote = !(p->flags & _WSNF_NOEXPAND);
496 }
497 else
498 unquote = 0;
499
500 if (unquote)
501 {
502 if (!(p->flags & _WSNF_WORD))
503 {
504 char *newstr = malloc (slen + 1);
505 if (!newstr)
506 return _wsplt_nomem (wsp);
507 memcpy (newstr, str, slen);
508 newstr[slen] = 0;
509 p->v.word = newstr;
510 p->flags |= _WSNF_WORD;
511 }
512
513 if (wsp->ws_flags & WRDSF_ESCAPE)
514 wordsplit_general_unquote_copy (p->v.word, str, slen,
515 wsp->ws_escape);
516 else
517 uqfn (p->v.word, str, slen);
518 }
519 }
520 return 0;
521}
522
523static int
524wsnode_coalesce (struct wordsplit *wsp)
525{
526 struct wordsplit_node *p;
527
528 for (p = wsp->ws_head; p; p = p->next)
529 {
530 if (p->flags & _WSNF_JOIN)
531 if (coalesce_segment (wsp, p))
532 return 1;
533 }
534 return 0;
535}
536
537static int
538wordsplit_finish (struct wordsplit *wsp)
539{
540 struct wordsplit_node *p;
541 size_t n;
542
543 n = 0;
544
545 for (p = wsp->ws_head; p; p = p->next)
546 n++;
547
548 if (alloc_space (wsp, n + 1))
549 return 1;
550
551 for (p = wsp->ws_head; p; p = p->next)
552 {
553 const char *str = wsnode_ptr (wsp, p);
554 size_t slen = wsnode_len (p);
555 char *newstr = malloc (slen + 1);
556
557 /* Assign newstr first, even if it is NULL. This way
558 wordsplit_free will work even if we return
559 nomem later. */
560 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
561 if (!newstr)
562 return _wsplt_nomem (wsp);
563 memcpy (newstr, str, slen);
564 newstr[slen] = 0;
565
566 wsp->ws_wordc++;
567
568 }
569 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
570 return 0;
571}
572
573
574/* Variable expansion */
575static int
576node_split_prefix (struct wordsplit *wsp,
577 struct wordsplit_node **ptail,
578 struct wordsplit_node *node,
579 size_t beg, size_t len, int flg)
580{
581 struct wordsplit_node *newnode;
582
583 if (len == 0)
584 return 0;
585 if (wsnode_new (wsp, &newnode))
586 return 1;
587 wsnode_insert (wsp, newnode, *ptail, 0);
588 if (node->flags & _WSNF_WORD)
589 {
590 const char *str = wsnode_ptr (wsp, node);
591 char *newstr = malloc (len + 1);
592 if (!newstr)
593 return _wsplt_nomem (wsp);
594 memcpy (newstr, str + beg, len);
595 newstr[len] = 0;
596 newnode->flags = _WSNF_WORD;
597 newnode->v.word = newstr;
598 }
599 else
600 {
601 newnode->v.segm.beg = node->v.segm.beg + beg;
602 newnode->v.segm.end = newnode->v.segm.beg + len;
603 }
604 newnode->flags |= flg;
605 *ptail = newnode;
606 return 0;
607}
608
609static int
610find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
611{
612 enum
613 { st_init, st_squote, st_dquote } state = st_init;
614 size_t level = 1;
615
616 for (; i < len; i++)
617 {
618 switch (state)
619 {
620 case st_init:
621 switch (str[i])
622 {
623 case '{':
624 level++;
625 break;
626
627 case '}':
628 if (--level == 0)
629 {
630 *poff = i;
631 return 0;
632 }
633 break;
634
635 case '"':
636 state = st_dquote;
637 break;
638
639 case '\'':
640 state = st_squote;
641 break;
642 }
643 break;
644
645 case st_squote:
646 if (str[i] == '\'')
647 state = st_init;
648 break;
649
650 case st_dquote:
651 if (str[i] == '\\')
652 i++;
653 else if (str[i] == '"')
654 state = st_init;
655 break;
656 }
657 }
658 return 1;
659}
660
661static const char *
662wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
663{
664 size_t i;
665
666 if (!(wsp->ws_flags & WRDSF_ENV))
667 return NULL;
668
669 if (wsp->ws_flags & WRDSF_ENV_KV)
670 {
671 /* A key-value pair environment */
672 for (i = 0; wsp->ws_env[i]; i++)
673 {
674 size_t elen = strlen (wsp->ws_env[i]);
675 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
676 return wsp->ws_env[i + 1];
677 /* Skip the value. Break the loop if it is NULL. */
678 i++;
679 if (wsp->ws_env[i] == NULL)
680 break;
681 }
682 }
683 else
684 {
685 /* Usual (A=B) environment. */
686 for (i = 0; wsp->ws_env[i]; i++)
687 {
688 size_t j;
689 const char *var = wsp->ws_env[i];
690
691 for (j = 0; j < len; j++)
692 if (name[j] != var[j])
693 break;
694 if (j == len && var[j] == '=')
695 return var + j + 1;
696 }
697 }
698 return NULL;
699}
700
701static int
702expvar (struct wordsplit *wsp, const char *str, size_t len,
703 struct wordsplit_node **ptail, const char **pend, int flg)
704{
705 size_t i = 0;
706 const char *defstr = NULL;
707 const char *value;
708 const char *vptr;
709 struct wordsplit_node *newnode;
710 const char *start = str - 1;
711
712 if (ISALPHA (str[0]) || str[0] == '_')
713 {
714 for (i = 1; i < len; i++)
715 if (!(ISALNUM (str[i]) || str[i] == '_'))
716 break;
717 *pend = str + i - 1;
718 }
719 else if (str[0] == '{')
720 {
721 str++;
722 len--;
723 for (i = 1; i < len; i++)
724 if (str[i] == '}' || str[i] == ':')
725 break;
726 if (str[i] == ':')
727 {
728 size_t j;
729
730 defstr = str + i + 1;
731 if (find_closing_cbrace (str, i + 1, len, &j))
732 {
733 wsp->ws_errno = WRDSE_CBRACE;
734 return 1;
735 }
736 *pend = str + j;
737 }
738 else if (str[i] == '}')
739 {
740 defstr = NULL;
741 *pend = str + i;
742 }
743 else
744 {
745 wsp->ws_errno = WRDSE_CBRACE;
746 return 1;
747 }
748 }
749 else
750 {
751 if (wsnode_new (wsp, &newnode))
752 return 1;
753 wsnode_insert (wsp, newnode, *ptail, 0);
754 *ptail = newnode;
755 newnode->flags = _WSNF_WORD | flg;
756 newnode->v.word = malloc (3);
757 if (!newnode->v.word)
758 return _wsplt_nomem (wsp);
759 newnode->v.word[0] = '$';
760 newnode->v.word[1] = str[0];
761 newnode->v.word[2] = 0;
762 *pend = str;
763 return 0;
764 }
765
766 /* Actually expand the variable */
767 /* str - start of the variable name
768 i - its length
769 defstr - default replacement str */
770
771 vptr = wordsplit_find_env (wsp, str, i);
772 if (vptr)
773 {
774 value = strdup (vptr);
775 if (!value)
776 return _wsplt_nomem (wsp);
777 }
778 else if (wsp->ws_flags & WRDSF_GETVAR)
779 value = wsp->ws_getvar (str, i, wsp->ws_closure);
780 else if (wsp->ws_flags & WRDSF_UNDEF)
781 {
782 wsp->ws_errno = WRDSE_UNDEF;
783 if (wsp->ws_flags & WRDSF_SHOWERR)
784 wordsplit_perror (wsp);
785 return 1;
786 }
787 else
788 {
789 if (wsp->ws_flags & WRDSF_WARNUNDEF)
790 wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str);
791 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
792 value = NULL;
793 else
794 value = "";
795 }
796 /* FIXME: handle defstr */
797 if (value)
798 {
799 if (flg & _WSNF_QUOTE)
800 {
801 if (wsnode_new (wsp, &newnode))
802 return 1;
803 wsnode_insert (wsp, newnode, *ptail, 0);
804 *ptail = newnode;
805 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
806 newnode->v.word = strdup (value);
807 if (!newnode->v.word)
808 return _wsplt_nomem (wsp);
809 }
810 else if (*value == 0)
811 {
812 /* Empty string is a special case */
813 if (wsnode_new (wsp, &newnode))
814 return 1;
815 wsnode_insert (wsp, newnode, *ptail, 0);
816 *ptail = newnode;
817 newnode->flags = _WSNF_NULL;
818 }
819 else
820 {
821 struct wordsplit ws;
822 int i;
823
824 ws.ws_delim = wsp->ws_delim;
825 if (wordsplit (value, &ws,
826 WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
827 {
828 wordsplit_free (&ws);
829 return 1;
830 }
831 for (i = 0; i < ws.ws_wordc; i++)
832 {
833 if (wsnode_new (wsp, &newnode))
834 return 1;
835 wsnode_insert (wsp, newnode, *ptail, 0);
836 *ptail = newnode;
837 newnode->flags = _WSNF_WORD |
838 _WSNF_NOEXPAND |
839 (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg);
840 newnode->v.word = strdup (ws.ws_wordv[i]);
841 if (!newnode->v.word)
842 return _wsplt_nomem (wsp);
843 }
844 wordsplit_free (&ws);
845 }
846 }
847 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
848 {
849 size_t size = *pend - start + 1;
850
851 if (wsnode_new (wsp, &newnode))
852 return 1;
853 wsnode_insert (wsp, newnode, *ptail, 0);
854 *ptail = newnode;
855 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
856 newnode->v.word = malloc (size + 1);
857 if (!newnode->v.word)
858 return _wsplt_nomem (wsp);
859 memcpy (newnode->v.word, start, size);
860 newnode->v.word[size] = 0;
861 }
862 else
863 {
864 if (wsnode_new (wsp, &newnode))
865 return 1;
866 wsnode_insert (wsp, newnode, *ptail, 0);
867 *ptail = newnode;
868 newnode->flags = _WSNF_NULL;
869 }
870 return 0;
871}
872
873static int
874node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
875{
876 const char *str = wsnode_ptr (wsp, node);
877 size_t slen = wsnode_len (node);
878 const char *end = str + slen;
879 const char *p;
880 size_t off = 0;
881 struct wordsplit_node *tail = node;
882
883 for (p = str; p < end; p++)
884 {
885 if (*p == '\\')
886 {
887 p++;
888 continue;
889 }
890 if (*p == '$')
891 {
892 size_t n = p - str;
893
894 if (tail != node)
895 tail->flags |= _WSNF_JOIN;
896 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
897 return 1;
898 p++;
899 if (expvar (wsp, p, slen - n, &tail, &p,
900 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
901 return 1;
902 off += p - str + 1;
903 str = p + 1;
904 }
905 }
906 if (p > str)
907 {
908 if (tail != node)
909 tail->flags |= _WSNF_JOIN;
910 if (node_split_prefix (wsp, &tail, node, off, p - str,
911 node->flags & _WSNF_JOIN))
912 return 1;
913 }
914 if (tail != node)
915 {
916 wsnode_remove (wsp, node);
917 wsnode_free (node);
918 }
919 return 0;
920}
921
922/* Remove NULL lists */
923static void
924wsnode_nullelim (struct wordsplit *wsp)
925{
926 struct wordsplit_node *p;
927
928 for (p = wsp->ws_head; p;)
929 {
930 struct wordsplit_node *next = p->next;
931 if (p->flags & _WSNF_NULL)
932 {
933 wsnode_remove (wsp, p);
934 wsnode_free (p);
935 }
936 p = next;
937 }
938}
939
940static int
941wordsplit_varexp (struct wordsplit *wsp)
942{
943 struct wordsplit_node *p;
944
945 for (p = wsp->ws_head; p;)
946 {
947 struct wordsplit_node *next = p->next;
948 if (!(p->flags & _WSNF_NOEXPAND))
949 if (node_expand_vars (wsp, p))
950 return 1;
951 p = next;
952 }
953
954 wsnode_nullelim (wsp);
955 return 0;
956}
957
958/* Strip off any leading and trailing whitespace. This function is called
959 right after the initial scanning, therefore it assumes that every
960 node in the list is a text reference node. */
961static void
962wordsplit_trimws (struct wordsplit *wsp)
963{
964 struct wordsplit_node *p;
965
966 for (p = wsp->ws_head; p; p = p->next)
967 {
968 size_t n;
969
970 if (p->flags & _WSNF_QUOTE)
971 continue;
972
973 /* Skip leading whitespace: */
974 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
975 n++)
976 ;
977 p->v.segm.beg = n;
978 /* Trim trailing whitespace */
979 for (n = p->v.segm.end;
980 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
981 p->v.segm.end = n;
982 if (p->v.segm.beg == p->v.segm.end)
983 p->flags |= _WSNF_NULL;
984 }
985
986 wsnode_nullelim (wsp);
987}
988
989static int
990skip_sed_expr (const char *command, size_t i, size_t len)
991{
992 int state;
993
994 do
995 {
996 int delim;
997
998 if (command[i] == ';')
999 i++;
1000 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1001 break;
1002
1003 delim = command[++i];
1004 state = 1;
1005 for (i++; i < len; i++)
1006 {
1007 if (state == 3)
1008 {
1009 if (command[i] == delim || !ISALNUM (command[i]))
1010 break;
1011 }
1012 else if (command[i] == '\\')
1013 i++;
1014 else if (command[i] == delim)
1015 state++;
1016 }
1017 }
1018 while (state == 3 && i < len && command[i] == ';');
1019 return i;
1020}
1021
1022static size_t
1023skip_delim (struct wordsplit *wsp)
1024{
1025 size_t start = wsp->ws_endp;
1026 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
1027 {
1028 if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
1029 ISDELIM (wsp, wsp->ws_input[start]))
1030 {
1031 int delim = wsp->ws_input[start];
1032 do
1033 start++;
1034 while (start < wsp->ws_len && delim == wsp->ws_input[start]);
1035 }
1036 else
1037 {
1038 do
1039 start++;
1040 while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
1041 }
1042 start--;
1043 }
1044
1045 if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS))
1046 start++;
1047
1048 return start;
1049}
1050
1051#define _WRDS_EOF 0
1052#define _WRDS_OK 1
1053#define _WRDS_ERR 2
1054
1055static int
1056scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
1057{
1058 size_t j;
1059 const char *command = wsp->ws_input;
1060 size_t len = wsp->ws_len;
1061 char q = command[start];
1062
1063 for (j = start + 1; j < len && command[j] != q; j++)
1064 if (q == '"' && command[j] == '\\')
1065 j++;
1066 if (j < len && command[j] == q)
1067 {
1068 int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
1069 if (q == '\'')
1070 flags |= _WSNF_NOEXPAND;
1071 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1072 return _WRDS_ERR;
1073 *end = j;
1074 }
1075 else
1076 {
1077 wsp->ws_endp = start;
1078 wsp->ws_errno = WRDSE_QUOTE;
1079 if (wsp->ws_flags & WRDSF_SHOWERR)
1080 wordsplit_perror (wsp);
1081 return _WRDS_ERR;
1082 }
1083 return 0;
1084}
1085
1086static int
1087scan_word (struct wordsplit *wsp, size_t start)
1088{
1089 size_t len = wsp->ws_len;
1090 const char *command = wsp->ws_input;
1091 const char *comment = wsp->ws_comment;
1092 int join = 0;
1093 int flags = 0;
1094
1095 size_t i = start;
1096
1097 if (i >= len)
1098 {
1099 wsp->ws_errno = WRDSE_EOF;
1100 return _WRDS_EOF;
1101 }
1102
1103 start = i;
1104
1105 if (wsp->ws_flags & WRDSF_SED_EXPR
1106 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1107 {
1108 flags = _WSNF_SEXP;
1109 i = skip_sed_expr (command, i, len);
1110 }
1111 else if (!ISDELIM (wsp, command[i]))
1112 {
1113 while (i < len)
1114 {
1115 if (comment && strchr (comment, command[i]) != NULL)
1116 {
1117 size_t j;
1118 for (j = i + 1; j < len && command[j] != '\n'; j++)
1119 ;
1120 if (wordsplit_add_segm (wsp, start, i, 0))
1121 return _WRDS_ERR;
1122 wsp->ws_endp = j;
1123 return _WRDS_OK;
1124 }
1125
1126 if (wsp->ws_flags & WRDSF_QUOTE)
1127 {
1128 if (command[i] == '\\')
1129 {
1130 if (++i == len)
1131 break;
1132 i++;
1133 continue;
1134 }
1135
1136 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
1137 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
1138 {
1139 if (join && wsp->ws_tail)
1140 wsp->ws_tail->flags |= _WSNF_JOIN;
1141 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
1142 return _WRDS_ERR;
1143 if (scan_qstring (wsp, i, &i))
1144 return _WRDS_ERR;
1145 start = i + 1;
1146 join = 1;
1147 }
1148 }
1149
1150 if (ISDELIM (wsp, command[i]))
1151 break;
1152 else
1153 i++;
1154 }
1155 }
1156 else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
1157 {
1158 i++;
1159 }
1160 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
1161 flags |= _WSNF_EMPTYOK;
1162
1163 if (join && i > start && wsp->ws_tail)
1164 wsp->ws_tail->flags |= _WSNF_JOIN;
1165 if (wordsplit_add_segm (wsp, start, i, flags))
1166 return _WRDS_ERR;
1167 wsp->ws_endp = i;
1168 if (wsp->ws_flags & WRDSF_INCREMENTAL)
1169 return _WRDS_EOF;
1170 return _WRDS_OK;
1171}
1172
1173static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1174
1175int
1176wordsplit_c_unquote_char (int c)
1177{
1178 char *p;
1179
1180 for (p = quote_transtab; *p; p += 2)
1181 {
1182 if (*p == c)
1183 return p[1];
1184 }
1185 return c;
1186}
1187
1188int
1189wordsplit_c_quote_char (int c)
1190{
1191 char *p;
1192
1193 for (p = quote_transtab + sizeof (quote_transtab) - 2;
1194 p > quote_transtab; p -= 2)
1195 {
1196 if (*p == c)
1197 return p[-1];
1198 }
1199 return -1;
1200}
1201
1202#define to_num(c) \
1203 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1204
1205static int
1206xtonum (int *pval, const char *src, int base, int cnt)
1207{
1208 int i, val;
1209
1210 for (i = 0, val = 0; i < cnt; i++, src++)
1211 {
1212 int n = *(unsigned char *) src;
1213 if (n > 127 || (n = to_num (n)) >= base)
1214 break;
1215 val = val * base + n;
1216 }
1217 *pval = val;
1218 return i;
1219}
1220
1221size_t
1222wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
1223{
1224 size_t len = 0;
1225
1226 *quote = 0;
1227 for (; *str; str++)
1228 {
1229 if (strchr (" \"", *str))
1230 *quote = 1;
1231
1232 if (*str == ' ')
1233 len++;
1234 else if (*str == '"')
1235 len += 2;
1236 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
1237 len++;
1238 else if (quote_hex)
1239 len += 3;
1240 else
1241 {
1242 if (wordsplit_c_quote_char (*str) != -1)
1243 len += 2;
1244 else
1245 len += 4;
1246 }
1247 }
1248 return len;
1249}
1250
1251void
1252wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
1253 const char *escapable)
1254{
1255 int i;
1256
1257 for (i = 0; i < n;)
1258 {
1259 if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
1260 i++;
1261 *dst++ = src[i++];
1262 }
1263 *dst = 0;
1264}
1265
1266void
1267wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
1268{
1269 int i;
1270
1271 for (i = 0; i < n;)
1272 {
1273 if (src[i] == '\\')
1274 i++;
1275 *dst++ = src[i++];
1276 }
1277 *dst = 0;
1278}
1279
1280void
1281wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
1282{
1283 int i = 0;
1284 int c;
1285
1286 while (i < n)
1287 {
1288 if (src[i] == '\\')
1289 {
1290 ++i;
1291 if (src[i] == 'x' || src[i] == 'X')
1292 {
1293 if (n - i < 2)
1294 {
1295 *dst++ = '\\';
1296 *dst++ = src[i++];
1297 }
1298 else
1299 {
1300 int off = xtonum (&c, src + i + 1,
1301 16, 2);
1302 if (off == 0)
1303 {
1304 *dst++ = '\\';
1305 *dst++ = src[i++];
1306 }
1307 else
1308 {
1309 *dst++ = c;
1310 i += off + 1;
1311 }
1312 }
1313 }
1314 else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
1315 {
1316 if (n - i < 1)
1317 {
1318 *dst++ = '\\';
1319 *dst++ = src[i++];
1320 }
1321 else
1322 {
1323 int off = xtonum (&c, src + i, 8, 3);
1324 if (off == 0)
1325 {
1326 *dst++ = '\\';
1327 *dst++ = src[i++];
1328 }
1329 else
1330 {
1331 *dst++ = c;
1332 i += off;
1333 }
1334 }
1335 }
1336 else
1337 *dst++ = wordsplit_c_unquote_char (src[i++]);
1338 }
1339 else
1340 *dst++ = src[i++];
1341 }
1342 *dst = 0;
1343}
1344
1345void
1346wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
1347{
1348 for (; *src; src++)
1349 {
1350 if (*src == '"')
1351 {
1352 *dst++ = '\\';
1353 *dst++ = *src;
1354 }
1355 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
1356 *dst++ = *src;
1357 else
1358 {
1359 char tmp[4];
1360
1361 if (quote_hex)
1362 {
1363 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
1364 memcpy (dst, tmp, 3);
1365 dst += 3;
1366 }
1367 else
1368 {
1369 int c = wordsplit_c_quote_char (*src);
1370 *dst++ = '\\';
1371 if (c != -1)
1372 *dst++ = c;
1373 else
1374 {
1375 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
1376 memcpy (dst, tmp, 3);
1377 dst += 3;
1378 }
1379 }
1380 }
1381 }
1382}
1383
1384static int
1385wordsplit_process_list (struct wordsplit *wsp, size_t start)
1386{
1387 if (wsp->ws_flags & WRDSF_NOSPLIT)
1388 {
1389 /* Treat entire input as a quoted argument */
1390 if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
1391 return wsp->ws_errno;
1392 }
1393 else
1394 {
1395 int rc;
1396
1397 while ((rc = scan_word (wsp, start)) == _WRDS_OK)
1398 start = skip_delim (wsp);
1399 /* Make sure tail element is not joinable */
1400 if (wsp->ws_tail)
1401 wsp->ws_tail->flags &= ~_WSNF_JOIN;
1402 if (rc == _WRDS_ERR)
1403 return wsp->ws_errno;
1404 }
1405
1406 if (wsp->ws_flags & WRDSF_SHOWDBG)
1407 {
1408 wsp->ws_debug ("Initial list:");
1409 wordsplit_dump_nodes (wsp);
1410 }
1411
1412 if (wsp->ws_flags & WRDSF_WS)
1413 {
1414 /* Trim leading and trailing whitespace */
1415 wordsplit_trimws (wsp);
1416 if (wsp->ws_flags & WRDSF_SHOWDBG)
1417 {
1418 wsp->ws_debug ("After WS trimming:");
1419 wordsplit_dump_nodes (wsp);
1420 }
1421 }
1422
1423 /* Expand variables (FIXME: & commands) */
1424 if (!(wsp->ws_flags & WRDSF_NOVAR))
1425 {
1426 if (wordsplit_varexp (wsp))
1427 {
1428 wordsplit_free_nodes (wsp);
1429 return wsp->ws_errno;
1430 }
1431 if (wsp->ws_flags & WRDSF_SHOWDBG)
1432 {
1433 wsp->ws_debug ("Expanded list:");
1434 wordsplit_dump_nodes (wsp);
1435 }
1436 }
1437
1438 do
1439 {
1440 if (wsnode_quoteremoval (wsp))
1441 break;
1442 if (wsp->ws_flags & WRDSF_SHOWDBG)
1443 {
1444 wsp->ws_debug ("After quote removal:");
1445 wordsplit_dump_nodes (wsp);
1446 }
1447
1448 if (wsnode_coalesce (wsp))
1449 break;
1450
1451 if (wsp->ws_flags & WRDSF_SHOWDBG)
1452 {
1453 wsp->ws_debug ("Coalesced list:");
1454 wordsplit_dump_nodes (wsp);
1455 }
1456 }
1457 while (0);
1458 return wsp->ws_errno;
1459}
1460
1461int
1462wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
1463 int flags)
1464{
1465 int rc;
1466 size_t start;
1467 const char *cmdptr;
1468 size_t cmdlen;
1469
1470 if (!command)
1471 {
1472 if (!(flags & WRDSF_INCREMENTAL))
1473 return EINVAL;
1474
1475 start = skip_delim (wsp);
1476 if (wsp->ws_endp == wsp->ws_len)
1477 {
1478 wsp->ws_errno = WRDSE_NOINPUT;
1479 if (wsp->ws_flags & WRDSF_SHOWERR)
1480 wordsplit_perror (wsp);
1481 return wsp->ws_errno;
1482 }
1483
1484 cmdptr = wsp->ws_input + wsp->ws_endp;
1485 cmdlen = wsp->ws_len - wsp->ws_endp;
1486 wsp->ws_flags |= WRDSF_REUSE;
1487 wordsplit_init0 (wsp);
1488 }
1489 else
1490 {
1491 cmdptr = command;
1492 cmdlen = length;
1493 start = 0;
1494 rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
1495 if (rc)
1496 return rc;
1497 }
1498
1499 if (wsp->ws_flags & WRDSF_SHOWDBG)
1500 wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
1501
1502 rc = wordsplit_process_list (wsp, start);
1503 if (rc == 0 && (flags & WRDSF_INCREMENTAL))
1504 {
1505 while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
1506 {
1507 start = skip_delim (wsp);
1508 if (wsp->ws_flags & WRDSF_SHOWDBG)
1509 {
1510 cmdptr = wsp->ws_input + wsp->ws_endp;
1511 cmdlen = wsp->ws_len - wsp->ws_endp;
1512 wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
1513 }
1514 rc = wordsplit_process_list (wsp, start);
1515 if (rc)
1516 break;
1517 }
1518 }
1519 if (rc)
1520 {
1521 wordsplit_free_nodes (wsp);
1522 return rc;
1523 }
1524 wordsplit_finish (wsp);
1525 wordsplit_free_nodes (wsp);
1526 return wsp->ws_errno;
1527}
1528
1529int
1530wordsplit (const char *command, struct wordsplit *ws, int flags)
1531{
1532 return wordsplit_len (command, command ? strlen (command) : 0, ws,
1533 flags);
1534}
1535
1536void
1537wordsplit_free_words (struct wordsplit *ws)
1538{
1539 size_t i;
1540
1541 for (i = 0; i < ws->ws_wordc; i++)
1542 {
1543 char *p = ws->ws_wordv[ws->ws_offs + i];
1544 if (p)
1545 {
1546 free (p);
1547 ws->ws_wordv[ws->ws_offs + i] = NULL;
1548 }
1549 }
1550 ws->ws_wordc = 0;
1551}
1552
1553void
1554wordsplit_free (struct wordsplit *ws)
1555{
1556 wordsplit_free_words (ws);
1557 free (ws->ws_wordv);
1558 ws->ws_wordv = NULL;
1559}
1560
1561void
1562wordsplit_perror (struct wordsplit *wsp)
1563{
1564 switch (wsp->ws_errno)
1565 {
1566 case WRDSE_EOF:
1567 wsp->ws_error (_("no error"));
1568 break;
1569
1570 case WRDSE_QUOTE:
1571 wsp->ws_error (_("missing closing %c (start near #%lu)"),
1572 wsp->ws_input[wsp->ws_endp],
1573 (unsigned long) wsp->ws_endp);
1574 break;
1575
1576 case WRDSE_NOSPACE:
1577 wsp->ws_error (_("memory exhausted"));
1578 break;
1579
1580 case WRDSE_NOSUPP:
1581 wsp->ws_error (_("command substitution is not yet supported"));
1582 break;
1583
1584 case WRDSE_USAGE:
1585 wsp->ws_error (_("invalid wordsplit usage"));
1586 break;
1587
1588 case WRDSE_CBRACE:
1589 wsp->ws_error (_("unbalanced curly brace"));
1590 break;
1591
1592 case WRDSE_UNDEF:
1593 wsp->ws_error (_("undefined variable"));
1594 break;
1595
1596 case WRDSE_NOINPUT:
1597 wsp->ws_error (_("input exhausted"));
1598 break;
1599
1600 default:
1601 wsp->ws_error (_("unknown error"));
1602 }
1603}
1604
1605const char *_wordsplit_errstr[] = {
1606 N_("no error"),
1607 N_("missing closing quote"),
1608 N_("memory exhausted"),
1609 N_("command substitution is not yet supported"),
1610 N_("invalid wordsplit usage"),
1611 N_("unbalanced curly brace"),
1612 N_("undefined variable"),
1613 N_("input exhausted")
1614};
1615int _wordsplit_nerrs =
1616 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
1617
1618const char *
1619wordsplit_strerror (struct wordsplit *ws)
1620{
1621 if (ws->ws_errno < _wordsplit_nerrs)
1622 return _wordsplit_errstr[ws->ws_errno];
1623 return N_("unknown error");
1624}
diff --git a/src/wordsplit.h b/src/wordsplit.h
new file mode 100644
index 0000000..35e125a
--- a/dev/null
+++ b/src/wordsplit.h
@@ -0,0 +1,159 @@
1/* wordsplit - a word splitter
2 Copyright (C) 2009-2012 Sergey Poznyakoff
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifndef __WORDSPLIT_H
18#define __WORDSPLIT_H
19
20#include <stddef.h>
21
22struct wordsplit
23{
24 size_t ws_wordc;
25 char **ws_wordv;
26 size_t ws_offs;
27 size_t ws_wordn;
28 int ws_flags;
29 const char *ws_delim;
30 const char *ws_comment;
31 const char *ws_escape;
32 void (*ws_alloc_die) (struct wordsplit * wsp);
33 void (*ws_error) (const char *, ...)
34 __attribute__ ((__format__ (__printf__, 1, 2)));
35 void (*ws_debug) (const char *, ...)
36 __attribute__ ((__format__ (__printf__, 1, 2)));
37
38 const char **ws_env;
39 const char *(*ws_getvar) (const char *, size_t, void *);
40 void *ws_closure;
41
42 const char *ws_input;
43 size_t ws_len;
44 size_t ws_endp;
45 int ws_errno;
46 struct wordsplit_node *ws_head, *ws_tail;
47};
48
49/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused.
50 It is getting crowded... */
51/* Append the words found to the array resulting from a previous
52 call. */
53#define WRDSF_APPEND 0x00000001
54/* Insert we_offs initial NULLs in the array ws_wordv.
55 (These are not counted in the returned ws_wordc.) */
56#define WRDSF_DOOFFS 0x00000002
57/* Don't do command substitution. Reserved for future use. */
58#define WRDSF_NOCMD 0x00000004
59/* The parameter p resulted from a previous call to
60 wordsplit(), and wordsplit_free() was not called. Reuse the
61 allocated storage. */
62#define WRDSF_REUSE 0x00000008
63/* Print errors */
64#define WRDSF_SHOWERR 0x00000010
65/* Consider it an error if an undefined shell variable
66 is expanded. */
67#define WRDSF_UNDEF 0x00000020
68
69/* Don't do variable expansion. */
70#define WRDSF_NOVAR 0x00000040
71/* Abort on ENOMEM error */
72#define WRDSF_ENOMEMABRT 0x00000080
73/* Trim off any leading and trailind whitespace */
74#define WRDSF_WS 0x00000100
75/* Handle single quotes */
76#define WRDSF_SQUOTE 0x00000200
77/* Handle double quotes */
78#define WRDSF_DQUOTE 0x00000400
79/* Handle quotes and escape directives */
80#define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE)
81/* Replace each input sequence of repeated delimiters with a single
82 delimiter */
83#define WRDSF_SQUEEZE_DELIMS 0x00000800
84/* Return delimiters */
85#define WRDSF_RETURN_DELIMS 0x00001000
86/* Treat sed expressions as words */
87#define WRDSF_SED_EXPR 0x00002000
88/* ws_delim field is initialized */
89#define WRDSF_DELIM 0x00004000
90/* ws_comment field is initialized */
91#define WRDSF_COMMENT 0x00008000
92/* ws_alloc_die field is initialized */
93#define WRDSF_ALLOC_DIE 0x00010000
94/* ws_error field is initialized */
95#define WRDSF_ERROR 0x00020000
96/* ws_debug field is initialized */
97#define WRDSF_DEBUG 0x00040000
98/* ws_env field is initialized */
99#define WRDSF_ENV 0x00080000
100/* ws_getvar field is initialized */
101#define WRDSF_GETVAR 0x00100000
102/* enable debugging */
103#define WRDSF_SHOWDBG 0x00200000
104/* Don't split input into words. Useful for side effects. */
105#define WRDSF_NOSPLIT 0x00400000
106/* Keep undefined variables in place, instead of expanding them to
107 empty string */
108#define WRDSF_KEEPUNDEF 0x00800000
109/* Warn about undefined variables */
110#define WRDSF_WARNUNDEF 0x01000000
111/* Handle C escapes */
112#define WRDSF_CESCAPES 0x02000000
113
114/* ws_closure is set */
115#define WRDSF_CLOSURE 0x04000000
116/* ws_env is a Key/Value environment, i.e. the value of a variable is
117 stored in the element that follows its name. */
118#define WRDSF_ENV_KV 0x08000000
119
120/* ws_escape is set */
121#define WRDSF_ESCAPE 0x10000000
122
123/* Incremental mode */
124#define WRDSF_INCREMENTAL 0x20000000
125
126 #define WRDSF_DEFFLAGS \
127 (WRDSF_NOVAR | WRDSF_NOCMD | \
128 WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
129
130#define WRDSE_EOF 0
131#define WRDSE_QUOTE 1
132#define WRDSE_NOSPACE 2
133#define WRDSE_NOSUPP 3
134#define WRDSE_USAGE 4
135#define WRDSE_CBRACE 5
136#define WRDSE_UNDEF 6
137#define WRDSE_NOINPUT 7
138
139int wordsplit (const char *s, struct wordsplit *p, int flags);
140int wordsplit_len (const char *s, size_t len,
141 struct wordsplit *p, int flags);
142void wordsplit_free (struct wordsplit *p);
143void wordsplit_free_words (struct wordsplit *ws);
144
145int wordsplit_c_unquote_char (int c);
146int wordsplit_c_quote_char (int c);
147size_t wordsplit_c_quoted_length (const char *str, int quote_hex,
148 int *quote);
149void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
150 const char *escapable);
151void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
152void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
153void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
154
155void wordsplit_perror (struct wordsplit *ws);
156const char *wordsplit_strerror (struct wordsplit *ws);
157
158
159#endif

Return to:

Send suggestions and report system problems to the System administrator.