summaryrefslogtreecommitdiffabout
path: root/src/wordsplit.c
Unidiff
Diffstat (limited to 'src/wordsplit.c') (more/less context) (ignore whitespace changes)
-rw-r--r--src/wordsplit.c2892
1 files changed, 0 insertions, 2892 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c
deleted file mode 100644
index 9179a87..0000000
--- a/src/wordsplit.c
+++ b/dev/null
@@ -1,2892 +0,0 @@
1/* wordsplit - a word splitter
2 Copyright (C) 2009-2019 Sergey Poznyakoff
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <ctype.h>
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <stdarg.h>
28#include <pwd.h>
29#include <glob.h>
30#include <limits.h>
31
32#if ENABLE_NLS
33# include <gettext.h>
34#else
35# define gettext(msgid) msgid
36#endif
37#define _(msgid) gettext (msgid)
38#define N_(msgid) msgid
39
40#include <wordsplit.h>
41
42#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43#define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
53
54#define ISVARBEG(c) (ISALPHA(c) || c == '_')
55#define ISVARCHR(c) (ISALNUM(c) || c == '_')
56
57#define WSP_RETURN_DELIMS(wsp) \
58 ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
59
60#define to_num(c) \
61 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
62
63#define ALLOC_INIT 128
64#define ALLOC_INCR 128
65
66static void
67_wsplt_alloc_die (struct wordsplit *wsp)
68{
69 wsp->ws_error ("%s", _("memory exhausted"));
70 abort ();
71}
72
73static void
74_wsplt_error (const char *fmt, ...)
75{
76 va_list ap;
77
78 va_start (ap, fmt);
79 vfprintf (stderr, fmt, ap);
80 va_end (ap);
81 fputc ('\n', stderr);
82}
83
84static void wordsplit_free_nodes (struct wordsplit *);
85
86static int
87_wsplt_seterr (struct wordsplit *wsp, int ec)
88{
89 wsp->ws_errno = ec;
90 if (wsp->ws_flags & WRDSF_SHOWERR)
91 wordsplit_perror (wsp);
92 return ec;
93}
94
95static int
96_wsplt_nomem (struct wordsplit *wsp)
97{
98 errno = ENOMEM;
99 wsp->ws_errno = WRDSE_NOSPACE;
100 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
101 wsp->ws_alloc_die (wsp);
102 if (wsp->ws_flags & WRDSF_SHOWERR)
103 wordsplit_perror (wsp);
104 if (!(wsp->ws_flags & WRDSF_REUSE))
105 wordsplit_free (wsp);
106 wordsplit_free_nodes (wsp);
107 return wsp->ws_errno;
108}
109
110static void
111_wsplt_store_errctx (struct wordsplit *wsp, char const *str, size_t len)
112{
113 free (wsp->ws_errctx);
114 wsp->ws_errctx = malloc (len + 1);
115 if (!wsp->ws_errctx)
116 {
117 wsp->ws_error ("%s",
118 _("memory exhausted while trying to store error context"));
119 }
120 else
121 {
122 memcpy (wsp->ws_errctx, str, len);
123 wsp->ws_errctx[len] = 0;
124 }
125}
126
127static inline int
128_wsplt_setctxerr (struct wordsplit *wsp, int ec, char const *str, size_t len)
129{
130 _wsplt_store_errctx (wsp, str, len);
131 return _wsplt_seterr (wsp, ec);
132}
133
134static int wordsplit_run (const char *command, size_t length,
135 struct wordsplit *wsp,
136 int flags, int lvl);
137
138static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
139 int flags);
140static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
141static int wordsplit_finish (struct wordsplit *wsp);
142
143static int
144_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
145 char const *str, int len,
146 int flags, int finalize)
147{
148 int rc;
149
150 wss->ws_delim = wsp->ws_delim;
151 wss->ws_debug = wsp->ws_debug;
152 wss->ws_error = wsp->ws_error;
153 wss->ws_alloc_die = wsp->ws_alloc_die;
154
155 if (!(flags & WRDSF_NOVAR))
156 {
157 wss->ws_env = wsp->ws_env;
158 wss->ws_getvar = wsp->ws_getvar;
159 flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
160 }
161 if (!(flags & WRDSF_NOCMD))
162 {
163 wss->ws_command = wsp->ws_command;
164 }
165
166 if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
167 {
168 wss->ws_closure = wsp->ws_closure;
169 flags |= wsp->ws_flags & WRDSF_CLOSURE;
170 }
171
172 wss->ws_options = wsp->ws_options;
173
174 flags |= WRDSF_DELIM
175 | WRDSF_ALLOC_DIE
176 | WRDSF_ERROR
177 | WRDSF_DEBUG
178 | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
179
180 rc = wordsplit_init (wss, str, len, flags);
181 if (rc)
182 return rc;
183 wss->ws_lvl = wsp->ws_lvl + 1;
184 rc = wordsplit_process_list (wss, 0);
185 if (rc)
186 {
187 wordsplit_free_nodes (wss);
188 return rc;
189 }
190 if (finalize)
191 {
192 rc = wordsplit_finish (wss);
193 wordsplit_free_nodes (wss);
194 }
195 return rc;
196}
197
198static void
199_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
200{
201 /* Clear user-defined error */
202 if (wsp->ws_errno == WRDSE_USERERR)
203 free (wsp->ws_usererr);
204 /* Copy error state */
205 wsp->ws_errno = wss->ws_errno;
206 if (wss->ws_errno == WRDSE_USERERR)
207 {
208 wsp->ws_usererr = wss->ws_usererr;
209 wss->ws_errno = WRDSE_EOF;
210 wss->ws_usererr = NULL;
211 }
212 /* Copy error context */
213 free (wsp->ws_errctx);
214 wsp->ws_errctx = wss->ws_errctx;
215 wss->ws_errctx = NULL;
216}
217
218static void
219wordsplit_init0 (struct wordsplit *wsp)
220{
221 if (wsp->ws_flags & WRDSF_REUSE)
222 {
223 if (!(wsp->ws_flags & WRDSF_APPEND))
224 wordsplit_free_words (wsp);
225 wordsplit_clearerr (wsp);
226 }
227 else
228 {
229 wsp->ws_wordv = NULL;
230 wsp->ws_wordc = 0;
231 wsp->ws_wordn = 0;
232 }
233
234 wsp->ws_errno = 0;
235}
236
237char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
238
239static int
240wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
241 int flags)
242{
243 wsp->ws_flags = flags;
244
245 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
246 wsp->ws_alloc_die = _wsplt_alloc_die;
247 if (!(wsp->ws_flags & WRDSF_ERROR))
248 wsp->ws_error = _wsplt_error;
249
250 if (!(wsp->ws_flags & WRDSF_NOVAR))
251 {
252 /* These will be initialized on first variable assignment */
253 wsp->ws_envidx = wsp->ws_envsiz = 0;
254 wsp->ws_envbuf = NULL;
255 }
256
257 if (!(wsp->ws_flags & WRDSF_NOCMD))
258 {
259 if (!wsp->ws_command)
260 {
261 _wsplt_seterr (wsp, WRDSE_USAGE);
262 errno = EINVAL;
263 return wsp->ws_errno;
264 }
265 }
266
267 if (wsp->ws_flags & WRDSF_SHOWDBG)
268 {
269 if (!(wsp->ws_flags & WRDSF_DEBUG))
270 {
271 if (wsp->ws_flags & WRDSF_ERROR)
272 wsp->ws_debug = wsp->ws_error;
273 else if (wsp->ws_flags & WRDSF_SHOWERR)
274 wsp->ws_debug = _wsplt_error;
275 else
276 wsp->ws_flags &= ~WRDSF_SHOWDBG;
277 }
278 }
279
280 wsp->ws_input = input;
281 wsp->ws_len = len;
282
283 if (!(wsp->ws_flags & WRDSF_DOOFFS))
284 wsp->ws_offs = 0;
285
286 if (!(wsp->ws_flags & WRDSF_DELIM))
287 wsp->ws_delim = " \t\n";
288
289 wsp->ws_sep[0] = wsp->ws_delim[0];
290 wsp->ws_sep[1] = 0;
291
292 if (!(wsp->ws_flags & WRDSF_COMMENT))
293 wsp->ws_comment = NULL;
294
295 if (!(wsp->ws_flags & WRDSF_CLOSURE))
296 wsp->ws_closure = NULL;
297
298 if (!(wsp->ws_flags & WRDSF_OPTIONS))
299 wsp->ws_options = 0;
300
301 if (wsp->ws_flags & WRDSF_ESCAPE)
302 {
303 if (!wsp->ws_escape[WRDSX_WORD])
304 wsp->ws_escape[WRDSX_WORD] = "";
305 if (!wsp->ws_escape[WRDSX_QUOTE])
306 wsp->ws_escape[WRDSX_QUOTE] = "";
307 }
308 else
309 {
310 if (wsp->ws_flags & WRDSF_CESCAPES)
311 {
312 wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
313 wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
314 wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
315 | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
316 }
317 else
318 {
319 wsp->ws_escape[WRDSX_WORD] = "";
320 wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
321 wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
322 }
323 }
324
325 if (!(wsp->ws_options & WRDSO_PARAMV))
326 {
327 wsp->ws_paramv = NULL;
328 wsp->ws_paramc = 0;
329 }
330 wsp->ws_paramidx = wsp->ws_paramsiz = 0;
331 wsp->ws_parambuf = NULL;
332
333 wsp->ws_endp = 0;
334 wsp->ws_wordi = 0;
335
336 if (wsp->ws_flags & WRDSF_REUSE)
337 wordsplit_free_nodes (wsp);
338 wsp->ws_head = wsp->ws_tail = NULL;
339
340 wsp->ws_errctx = NULL;
341
342 wordsplit_init0 (wsp);
343
344 return 0;
345}
346
347static int
348alloc_space (struct wordsplit *wsp, size_t count)
349{
350 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
351 char **ptr;
352 size_t newalloc;
353
354 if (wsp->ws_wordv == NULL)
355 {
356 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
357 ptr = calloc (newalloc, sizeof (ptr[0]));
358 }
359 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
360 {
361 newalloc = offs + wsp->ws_wordc +
362 (count > ALLOC_INCR ? count : ALLOC_INCR);
363 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
364 }
365 else
366 return 0;
367
368 if (ptr)
369 {
370 wsp->ws_wordn = newalloc;
371 wsp->ws_wordv = ptr;
372 }
373 else
374 return _wsplt_nomem (wsp);
375 return 0;
376}
377
378
379/* Node state flags */
380 #define _WSNF_NULL 0x01/* null node (a noop) */
381 #define _WSNF_WORD 0x02/* node contains word in v.word */
382 #define _WSNF_QUOTE 0x04/* text is quoted */
383 #define _WSNF_NOEXPAND 0x08/* text is not subject to expansion */
384 #define _WSNF_JOIN 0x10/* node must be joined with the next node */
385 #define _WSNF_SEXP 0x20/* is a sed expression */
386#define _WSNF_DELIM 0x40 /* node is a delimiter */
387#define _WSNF_CONST 0x80 /* with _WSNF_WORD: v.word is constant */
388 #define _WSNF_EMPTYOK 0x0100/* special flag indicating that
389 wordsplit_add_segm must add the
390 segment even if it is empty */
391
392struct wordsplit_node
393{
394 struct wordsplit_node *prev;/* Previous element */
395 struct wordsplit_node *next;/* Next element */
396 int flags; /* Node flags */
397 union
398 {
399 struct
400 {
401 size_t beg; /* Start of word in ws_input */
402 size_t end; /* End of word in ws_input */
403 } segm;
404 char *word;
405 } v;
406};
407
408static const char *
409wsnode_flagstr (int flags)
410{
411 static char retbuf[7];
412 char *p = retbuf;
413
414 if (flags & _WSNF_WORD)
415 *p++ = 'w';
416 else if (flags & _WSNF_NULL)
417 *p++ = 'n';
418 else
419 *p++ = '-';
420 if (flags & _WSNF_QUOTE)
421 *p++ = 'q';
422 else
423 *p++ = '-';
424 if (flags & _WSNF_NOEXPAND)
425 *p++ = 'E';
426 else
427 *p++ = '-';
428 if (flags & _WSNF_JOIN)
429 *p++ = 'j';
430 else
431 *p++ = '-';
432 if (flags & _WSNF_SEXP)
433 *p++ = 's';
434 else
435 *p++ = '-';
436 if (flags & _WSNF_DELIM)
437 *p++ = 'd';
438 else
439 *p++ = '-';
440 *p = 0;
441 return retbuf;
442}
443
444static const char *
445wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
446{
447 if (p->flags & _WSNF_NULL)
448 return "";
449 else if (p->flags & _WSNF_WORD)
450 return p->v.word;
451 else
452 return wsp->ws_input + p->v.segm.beg;
453}
454
455static size_t
456wsnode_len (struct wordsplit_node *p)
457{
458 if (p->flags & _WSNF_NULL)
459 return 0;
460 else if (p->flags & _WSNF_WORD)
461 return strlen (p->v.word);
462 else
463 return p->v.segm.end - p->v.segm.beg;
464}
465
466static int
467wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
468{
469 struct wordsplit_node *node = calloc (1, sizeof (*node));
470 if (!node)
471 return _wsplt_nomem (wsp);
472 *pnode = node;
473 return 0;
474}
475
476static void
477wsnode_free (struct wordsplit_node *p)
478{
479 if ((p->flags & (_WSNF_WORD|_WSNF_CONST)) == _WSNF_WORD)
480 free (p->v.word);
481 free (p);
482}
483
484static void
485wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
486{
487 node->next = NULL;
488 node->prev = wsp->ws_tail;
489 if (wsp->ws_tail)
490 wsp->ws_tail->next = node;
491 else
492 wsp->ws_head = node;
493 wsp->ws_tail = node;
494}
495
496static void
497wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
498{
499 struct wordsplit_node *p;
500
501 p = node->prev;
502 if (p)
503 {
504 p->next = node->next;
505 if (!node->next)
506 p->flags &= ~_WSNF_JOIN;
507 }
508 else
509 wsp->ws_head = node->next;
510
511 p = node->next;
512 if (p)
513 p->prev = node->prev;
514 else
515 wsp->ws_tail = node->prev;
516
517 node->next = node->prev = NULL;
518}
519
520static struct wordsplit_node *
521wsnode_tail (struct wordsplit_node *p)
522{
523 while (p && p->next)
524 p = p->next;
525 return p;
526}
527
528static void
529wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
530 struct wordsplit_node *anchor, int before)
531{
532 if (!wsp->ws_head)
533 {
534 node->next = node->prev = NULL;
535 wsp->ws_head = wsp->ws_tail = node;
536 }
537 else if (before)
538 {
539 if (anchor->prev)
540 wsnode_insert (wsp, node, anchor->prev, 0);
541 else
542 {
543 struct wordsplit_node *tail = wsnode_tail (node);
544 node->prev = NULL;
545 tail->next = anchor;
546 anchor->prev = tail;
547 wsp->ws_head = node;
548 }
549 }
550 else
551 {
552 struct wordsplit_node *p;
553 struct wordsplit_node *tail = wsnode_tail (node);
554
555 p = anchor->next;
556 if (p)
557 p->prev = tail;
558 else
559 wsp->ws_tail = tail;
560 tail->next = p;
561 node->prev = anchor;
562 anchor->next = node;
563 }
564}
565
566static int
567wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
568{
569 struct wordsplit_node *node;
570 int rc;
571
572 if (end == beg && !(flg & _WSNF_EMPTYOK))
573 return 0;
574 rc = wsnode_new (wsp, &node);
575 if (rc)
576 return rc;
577 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
578 node->v.segm.beg = beg;
579 node->v.segm.end = end;
580 wsnode_append (wsp, node);
581 return 0;
582}
583
584static void
585wordsplit_free_nodes (struct wordsplit *wsp)
586{
587 struct wordsplit_node *p;
588
589 for (p = wsp->ws_head; p;)
590 {
591 struct wordsplit_node *next = p->next;
592 wsnode_free (p);
593 p = next;
594 }
595 wsp->ws_head = wsp->ws_tail = NULL;
596}
597
598static void
599wordsplit_dump_nodes (struct wordsplit *wsp)
600{
601 struct wordsplit_node *p;
602 int n = 0;
603
604 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
605 {
606 if (p->flags & _WSNF_WORD)
607 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
608 wsp->ws_lvl,
609 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
610 else
611 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
612 wsp->ws_lvl,
613 n, p, p->flags, wsnode_flagstr (p->flags),
614 (int) (p->v.segm.end - p->v.segm.beg),
615 wsp->ws_input + p->v.segm.beg);
616 }
617}
618
619static int
620coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
621{
622 struct wordsplit_node *p, *end;
623 size_t len = 0;
624 char *buf, *cur;
625 int stop;
626
627 if (!(node->flags & _WSNF_JOIN))
628 return 0;
629
630 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
631 {
632 len += wsnode_len (p);
633 }
634 if (p)
635 len += wsnode_len (p);
636 end = p;
637
638 buf = malloc (len + 1);
639 if (!buf)
640 return _wsplt_nomem (wsp);
641 cur = buf;
642
643 p = node;
644 for (stop = 0; !stop;)
645 {
646 struct wordsplit_node *next = p->next;
647 const char *str = wsnode_ptr (wsp, p);
648 size_t slen = wsnode_len (p);
649
650 memcpy (cur, str, slen);
651 cur += slen;
652 if (p != node)
653 {
654 node->flags |= p->flags & _WSNF_QUOTE;
655 wsnode_remove (wsp, p);
656 stop = p == end;
657 wsnode_free (p);
658 }
659 p = next;
660 }
661
662 *cur = 0;
663
664 node->flags &= ~_WSNF_JOIN;
665
666 if (node->flags & _WSNF_WORD)
667 free (node->v.word);
668 else
669 node->flags |= _WSNF_WORD;
670 node->v.word = buf;
671 return 0;
672}
673
674static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
675 char *dst, const char *src,
676 size_t n);
677
678static int
679wsnode_quoteremoval (struct wordsplit *wsp)
680{
681 struct wordsplit_node *p;
682
683 for (p = wsp->ws_head; p; p = p->next)
684 {
685 const char *str = wsnode_ptr (wsp, p);
686 size_t slen = wsnode_len (p);
687 int unquote;
688
689 if (wsp->ws_flags & WRDSF_QUOTE)
690 unquote = !(p->flags & _WSNF_NOEXPAND);
691 else
692 unquote = 0;
693
694 if (unquote)
695 {
696 if (!(p->flags & _WSNF_WORD))
697 {
698 char *newstr = malloc (slen + 1);
699 if (!newstr)
700 return _wsplt_nomem (wsp);
701 memcpy (newstr, str, slen);
702 newstr[slen] = 0;
703 p->v.word = newstr;
704 p->flags |= _WSNF_WORD;
705 }
706
707 wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
708 p->v.word, str, slen);
709 }
710 }
711 return 0;
712}
713
714static int
715wsnode_coalesce (struct wordsplit *wsp)
716{
717 struct wordsplit_node *p;
718
719 for (p = wsp->ws_head; p; p = p->next)
720 {
721 if (p->flags & _WSNF_JOIN)
722 if (coalesce_segment (wsp, p))
723 return 1;
724 }
725 return 0;
726}
727
728static int
729wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
730{
731 if (p->next)
732 {
733 struct wordsplit_node *np = p;
734 while (np && np->next)
735 {
736 np->flags |= _WSNF_JOIN;
737 np = np->next;
738 }
739 if (coalesce_segment (wsp, p))
740 return 1;
741 }
742 return 0;
743}
744
745static size_t skip_delim (struct wordsplit *wsp);
746
747static int
748wordsplit_finish (struct wordsplit *wsp)
749{
750 struct wordsplit_node *p;
751 size_t n;
752 int delim;
753
754 /* Postprocess delimiters. It would be rather simple, if it weren't for
755 the incremental operation.
756
757 Nodes of type _WSNF_DELIM get inserted to the node list if either
758 WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
759
760 The following cases should be distinguished:
761
762 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
763 any runs of similar delimiter nodes to a single node. The nodes are
764 'similar' if they point to the same delimiter character.
765
766 If WRDSO_MAXWORDS option is set, stop compressing when
767 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
768 a single last node.
769
770 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
771 remove any delimiter nodes. Stop operation when
772 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
773 a single last node.
774
775 3. If incremental operation is in progress, restart the loop any time
776 a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
777 is set.
778 */
779 again:
780 delim = 0; /* Delimiter being processed (if any) */
781 n = 0; /* Number of words processed so far */
782 p = wsp->ws_head; /* Current node */
783
784 while (p)
785 {
786 struct wordsplit_node *next = p->next;
787 if (p->flags & _WSNF_DELIM)
788 {
789 if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
790 {
791 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
792 {
793 char const *s = wsnode_ptr (wsp, p);
794 if (delim)
795 {
796 if (delim == *s)
797 {
798 wsnode_remove (wsp, p);
799 p = next;
800 continue;
801 }
802 else
803 {
804 delim = 0;
805 n++; /* Count this node; it will be returned */
806 }
807 }
808 else
809 {
810 delim = *s;
811 p = next;
812 continue;
813 }
814 }
815 }
816 else if (wsp->ws_options & WRDSO_MAXWORDS)
817 {
818 wsnode_remove (wsp, p);
819 p = next;
820 continue;
821 }
822 }
823 else
824 {
825 if (delim)
826 {
827 /* Last node was a delimiter or a compressed run of delimiters;
828 Count it, and clear the delimiter marker */
829 n++;
830 delim = 0;
831 }
832 if (wsp->ws_options & WRDSO_MAXWORDS)
833 {
834 if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
835 break;
836 }
837 }
838 n++;
839 if (wsp->ws_flags & WRDSF_INCREMENTAL)
840 p = NULL; /* Break the loop */
841 else
842 p = next;
843 }
844
845 if (p)
846 {
847 /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
848 words have already been collected. Reconstruct a single final
849 node from the remaining nodes. */
850 if (wsnode_tail_coalesce (wsp, p))
851 return wsp->ws_errno;
852 n++;
853 }
854
855 if (n == 0)
856 {
857 /* The loop above have eliminated all nodes. */
858 if (wsp->ws_flags & WRDSF_INCREMENTAL)
859 {
860 /* Restart the processing, if there's any input left. */
861 if (wsp->ws_endp < wsp->ws_len)
862 {
863 int rc;
864 if (wsp->ws_flags & WRDSF_SHOWDBG)
865 wsp->ws_debug (_("Restarting"));
866 rc = wordsplit_process_list (wsp, skip_delim (wsp));
867 if (rc)
868 return rc;
869 }
870 else
871 {
872 wsp->ws_errno = WRDSE_EOF;
873 return WRDSE_EOF;
874 }
875 goto again;
876 }
877
878 if (wsp->ws_flags & WRDSF_NOSPLIT)
879 {
880 if (wordsplit_add_segm (wsp, 0, 0, _WSNF_EMPTYOK))
881 return wsp->ws_errno;
882 n = 1;
883 }
884 }
885
886 if (alloc_space (wsp, n + 1))
887 return wsp->ws_errno;
888
889 while (wsp->ws_head)
890 {
891 const char *str = wsnode_ptr (wsp, wsp->ws_head);
892 size_t slen = wsnode_len (wsp->ws_head);
893 char *newstr = malloc (slen + 1);
894
895 /* Assign newstr first, even if it is NULL. This way
896 wordsplit_free will work even if we return
897 nomem later. */
898 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
899 if (!newstr)
900 return _wsplt_nomem (wsp);
901 memcpy (newstr, str, slen);
902 newstr[slen] = 0;
903
904 wsnode_remove (wsp, wsp->ws_head);
905
906 wsp->ws_wordc++;
907 wsp->ws_wordi++;
908
909 if (wsp->ws_flags & WRDSF_INCREMENTAL)
910 break;
911 }
912 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
913 return 0;
914}
915
916int
917wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
918{
919 int rc;
920 size_t i;
921
922 rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
923 if (rc)
924 return rc;
925 for (i = 0; i < argc; i++)
926 {
927 char *newstr = strdup (argv[i]);
928 if (!newstr)
929 {
930 while (i > 0)
931 {
932 free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
933 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
934 i--;
935 }
936 return _wsplt_nomem (wsp);
937 }
938 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
939 }
940 wsp->ws_wordc += i;
941 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
942 return 0;
943}
944
945/* Variable expansion */
946static int
947node_split_prefix (struct wordsplit *wsp,
948 struct wordsplit_node **ptail,
949 struct wordsplit_node *node,
950 size_t beg, size_t len, int flg)
951{
952 struct wordsplit_node *newnode;
953
954 if (len == 0)
955 return 0;
956 if (wsnode_new (wsp, &newnode))
957 return 1;
958 wsnode_insert (wsp, newnode, *ptail, 0);
959 if (node->flags & _WSNF_WORD)
960 {
961 const char *str = wsnode_ptr (wsp, node);
962 char *newstr = malloc (len + 1);
963 if (!newstr)
964 return _wsplt_nomem (wsp);
965 memcpy (newstr, str + beg, len);
966 newstr[len] = 0;
967 newnode->flags = _WSNF_WORD;
968 newnode->v.word = newstr;
969 }
970 else
971 {
972 newnode->v.segm.beg = node->v.segm.beg + beg;
973 newnode->v.segm.end = newnode->v.segm.beg + len;
974 }
975 newnode->flags |= flg;
976 *ptail = newnode;
977 return 0;
978}
979
980static int
981find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
982 char const *paren)
983{
984 enum { st_init, st_squote, st_dquote } state = st_init;
985 size_t level = 1;
986
987 for (; i < len; i++)
988 {
989 switch (state)
990 {
991 case st_init:
992 switch (str[i])
993 {
994 default:
995 if (str[i] == paren[0])
996 {
997 level++;
998 break;
999 }
1000 else if (str[i] == paren[1])
1001 {
1002 if (--level == 0)
1003 {
1004 *poff = i;
1005 return 0;
1006 }
1007 break;
1008 }
1009 break;
1010
1011 case '"':
1012 state = st_dquote;
1013 break;
1014
1015 case '\'':
1016 state = st_squote;
1017 break;
1018 }
1019 break;
1020
1021 case st_squote:
1022 if (str[i] == '\'')
1023 state = st_init;
1024 break;
1025
1026 case st_dquote:
1027 if (str[i] == '\\')
1028 i++;
1029 else if (str[i] == '"')
1030 state = st_init;
1031 break;
1032 }
1033 }
1034 return 1;
1035}
1036
1037static char const *
1038wsplt_env_find (struct wordsplit *wsp, const char *name, size_t len)
1039{
1040 size_t i;
1041
1042 if (!wsp->ws_env)
1043 return NULL;
1044 if (wsp->ws_flags & WRDSF_ENV_KV)
1045 {
1046 /* A key-value pair environment */
1047 for (i = 0; wsp->ws_env[i]; i++)
1048 {
1049 size_t elen = strlen (wsp->ws_env[i]);
1050 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
1051 return wsp->ws_env[i + 1];
1052 /* Skip the value. Break the loop if it is NULL. */
1053 i++;
1054 if (wsp->ws_env[i] == NULL)
1055 break;
1056 }
1057 }
1058 else
1059 {
1060 /* Usual (A=B) environment. */
1061 for (i = 0; wsp->ws_env[i]; i++)
1062 {
1063 size_t j;
1064 const char *var = wsp->ws_env[i];
1065
1066 for (j = 0; j < len; j++)
1067 if (name[j] != var[j])
1068 break;
1069 if (j == len && var[j] == '=')
1070 return var + j + 1;
1071 }
1072 }
1073 return NULL;
1074}
1075
1076static int
1077wsplt_env_lookup (struct wordsplit *wsp, const char *name, size_t len,
1078 char **ret)
1079{
1080 if (wsp->ws_flags & WRDSF_ENV)
1081 {
1082 char const *val = wsplt_env_find (wsp, name, len);
1083 if (val)
1084 {
1085 char *retval = strdup (val);
1086 if (!retval)
1087 return WRDSE_NOSPACE;
1088 *ret = retval;
1089 return WRDSE_OK;
1090 }
1091 }
1092 return WRDSE_UNDEF;
1093}
1094
1095static int
1096wsplt_env_getvar (struct wordsplit *wsp, const char *name, size_t len,
1097 char **ret)
1098{
1099 return wsp->ws_getvar (ret, name, len, wsp->ws_closure);
1100}
1101
1102static int
1103wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
1104 char const *value)
1105{
1106 int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
1107 char *v;
1108
1109 if (wsp->ws_envidx + n >= wsp->ws_envsiz)
1110 {
1111 size_t sz;
1112 char **newenv;
1113
1114 if (!wsp->ws_envbuf)
1115 {
1116 if (wsp->ws_flags & WRDSF_ENV)
1117 {
1118 size_t i = 0, j;
1119
1120 if (wsp->ws_env)
1121 {
1122 for (; wsp->ws_env[i]; i++)
1123 ;
1124 }
1125
1126 sz = i + n + 1;
1127
1128 newenv = calloc (sz, sizeof(newenv[0]));
1129 if (!newenv)
1130 return _wsplt_nomem (wsp);
1131
1132 for (j = 0; j < i; j++)
1133 {
1134 newenv[j] = strdup (wsp->ws_env[j]);
1135 if (!newenv[j])
1136 {
1137 for (; j > 1; j--)
1138 free (newenv[j-1]);
1139 free (newenv);
1140 return _wsplt_nomem (wsp);
1141 }
1142 }
1143 newenv[j] = NULL;
1144
1145 wsp->ws_envbuf = newenv;
1146 wsp->ws_envidx = i;
1147 wsp->ws_envsiz = sz;
1148 wsp->ws_env = (const char**) wsp->ws_envbuf;
1149 }
1150 else
1151 {
1152 newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
1153 if (!newenv)
1154 return _wsplt_nomem (wsp);
1155 wsp->ws_envbuf = newenv;
1156 wsp->ws_envidx = 0;
1157 wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
1158 wsp->ws_env = (const char**) wsp->ws_envbuf;
1159 wsp->ws_flags |= WRDSF_ENV;
1160 }
1161 }
1162 else
1163 {
1164 size_t n = wsp->ws_envsiz;
1165
1166 if ((size_t) -1 / 3 * 2 / sizeof (wsp->ws_envbuf[0]) <= n)
1167 return _wsplt_nomem (wsp);
1168 n += (n + 1) / 2;
1169 newenv = realloc (wsp->ws_envbuf, n * sizeof (wsp->ws_envbuf[0]));
1170 if (!newenv)
1171 return _wsplt_nomem (wsp);
1172 wsp->ws_envbuf = newenv;
1173 wsp->ws_envsiz = n;
1174 wsp->ws_env = (const char**) wsp->ws_envbuf;
1175 }
1176 }
1177
1178 if (wsp->ws_flags & WRDSF_ENV_KV)
1179 {
1180 /* A key-value pair environment */
1181 char *p = malloc (namelen + 1);
1182 if (!p)
1183 return _wsplt_nomem (wsp);
1184 memcpy (p, name, namelen);
1185 p[namelen] = 0;
1186
1187 v = strdup (value);
1188 if (!v)
1189 {
1190 free (p);
1191 return _wsplt_nomem (wsp);
1192 }
1193 wsp->ws_env[wsp->ws_envidx++] = p;
1194 wsp->ws_env[wsp->ws_envidx++] = v;
1195 }
1196 else
1197 {
1198 v = malloc (namelen + strlen(value) + 2);
1199 if (!v)
1200 return _wsplt_nomem (wsp);
1201 memcpy (v, name, namelen);
1202 v[namelen++] = '=';
1203 strcpy(v + namelen, value);
1204 wsp->ws_env[wsp->ws_envidx++] = v;
1205 }
1206 wsp->ws_env[wsp->ws_envidx] = NULL;
1207 return WRDSE_OK;
1208}
1209
1210int
1211wsplt_assign_param (struct wordsplit *wsp, int param_idx, char *value)
1212{
1213 char *v;
1214
1215 if (param_idx < 0)
1216 return _wsplt_seterr (wsp, WRDSE_BADPARAM);
1217 if (param_idx == wsp->ws_paramc)
1218 {
1219 char **parambuf;
1220 if (!wsp->ws_parambuf)
1221 {
1222 size_t i;
1223
1224 parambuf = calloc ((size_t)param_idx + 1, sizeof (parambuf[0]));
1225 if (!parambuf)
1226 return _wsplt_nomem (wsp);
1227
1228 for (i = 0; i < wsp->ws_paramc; i++)
1229 {
1230 parambuf[i] = strdup (wsp->ws_paramv[i]);
1231 if (!parambuf[i])
1232 {
1233 for (; i > 1; i--)
1234 free (parambuf[i-1]);
1235 free (parambuf);
1236 return _wsplt_nomem (wsp);
1237 }
1238 }
1239
1240 wsp->ws_parambuf = parambuf;
1241 wsp->ws_paramidx = param_idx;
1242 wsp->ws_paramsiz = param_idx + 1;
1243 }
1244 else
1245 {
1246 size_t n = wsp->ws_paramsiz;
1247
1248 if ((size_t) -1 / 3 * 2 / sizeof (wsp->ws_parambuf[0]) <= n)
1249 return _wsplt_nomem (wsp);
1250 n += (n + 1) / 2;
1251 parambuf = realloc (wsp->ws_parambuf, n * sizeof (wsp->ws_parambuf[0]));
1252 if (!parambuf)
1253 return _wsplt_nomem (wsp);
1254 wsp->ws_parambuf = parambuf;
1255 wsp->ws_paramsiz = n;
1256 wsp->ws_parambuf[param_idx] = NULL;
1257 }
1258
1259 wsp->ws_paramv = (const char**) wsp->ws_parambuf;
1260 wsp->ws_paramc = param_idx + 1;
1261 }
1262 else if (param_idx > wsp->ws_paramc)
1263 return _wsplt_seterr (wsp, WRDSE_BADPARAM);
1264
1265 v = strdup (value);
1266 if (!v)
1267 return _wsplt_nomem (wsp);
1268
1269 free (wsp->ws_parambuf[param_idx]);
1270 wsp->ws_parambuf[param_idx] = v;
1271 return WRDSE_OK;
1272}
1273
1274/* Recover from what looked like a variable reference, but turned out
1275 not to be one. STR points to first character after '$'. */
1276static int
1277expvar_recover (struct wordsplit *wsp, const char *str,
1278 struct wordsplit_node **ptail, const char **pend, int flg)
1279{
1280 struct wordsplit_node *newnode;
1281
1282 if (wsnode_new (wsp, &newnode))
1283 return 1;
1284 wsnode_insert (wsp, newnode, *ptail, 0);
1285 *ptail = newnode;
1286 newnode->flags = _WSNF_WORD | flg;
1287 newnode->v.word = malloc (3);
1288 if (!newnode->v.word)
1289 return _wsplt_nomem (wsp);
1290 newnode->v.word[0] = '$';
1291 newnode->v.word[1] = str[0];
1292 newnode->v.word[2] = 0;
1293 *pend = str;
1294 return 0;
1295}
1296
1297static int
1298expand_paramv (struct wordsplit *wsp, struct wordsplit_node **ptail, int flg,
1299 int q)
1300{
1301 struct wordsplit ws;
1302 int wsflags = WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_QUOTE
1303 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0)
1304 | (q ? WRDSF_NOSPLIT : 0);
1305 size_t i;
1306 struct wordsplit_node *tail = *ptail;
1307
1308 for (i = 0; i < wsp->ws_paramc; i++)
1309 {
1310 struct wordsplit_node *np;
1311 int rc = _wsplt_subsplit (wsp, &ws,
1312 wsp->ws_paramv[i], strlen (wsp->ws_paramv[i]),
1313 wsflags, q);
1314 if (rc)
1315 {
1316 _wsplt_seterr_sub (wsp, &ws);
1317 wordsplit_free (&ws);
1318 return 1;
1319 }
1320
1321 if (q)
1322 {
1323 if (wsnode_new (wsp, &np))
1324 return 1;
1325 wsnode_insert (wsp, np, *ptail, 0);
1326 *ptail = np;
1327 np->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1328 np->v.word = ws.ws_wordv[0];
1329
1330 ws.ws_wordv[0] = NULL;
1331 }
1332 else
1333 {
1334 for (np = ws.ws_head; np; np = np->next)
1335 np->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1336 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1337 *ptail = ws.ws_tail;
1338 ws.ws_head = ws.ws_tail = NULL;
1339 }
1340
1341 wsflags |= WRDSF_REUSE;
1342 }
1343 if (wsflags & WRDSF_REUSE)
1344 wordsplit_free (&ws);
1345
1346 if (flg & _WSNF_QUOTE)
1347 {
1348 tail = tail->next;
1349 /* Insert delimiters, mark nodes as joinable */
1350 while (tail != *ptail)
1351 {
1352 struct wordsplit_node *next = tail->next;
1353 struct wordsplit_node *newnode;
1354
1355 tail->flags |= _WSNF_JOIN;
1356
1357 if (wsnode_new (wsp, &newnode))
1358 return 1;
1359 newnode->flags = _WSNF_WORD | _WSNF_CONST | _WSNF_NOEXPAND | _WSNF_JOIN;
1360 newnode->v.word = wsp->ws_sep;
1361
1362 wsnode_insert (wsp, newnode, tail, 0);
1363 tail = next;
1364 }
1365 }
1366
1367 return 0;
1368}
1369
1370static int
1371expvar (struct wordsplit *wsp, const char *str, size_t len,
1372 struct wordsplit_node **ptail, const char **pend, int flg)
1373{
1374 size_t i = 0;
1375 const char *defstr = NULL;
1376 char *value;
1377 struct wordsplit_node *newnode;
1378 const char *start = str - 1;
1379 int rc;
1380 struct wordsplit ws;
1381 int is_param = 0;
1382 long param_idx = 0;
1383
1384 if (ISVARBEG (str[0]))
1385 {
1386 for (i = 1; i < len; i++)
1387 if (!ISVARCHR (str[i]))
1388 break;
1389 *pend = str + i - 1;
1390 }
1391 else if ((wsp->ws_options & WRDSO_PARAMV) && ISDIGIT (str[0]))
1392 {
1393 i = 1;
1394 *pend = str;
1395 is_param = 1;
1396 param_idx = to_num (str[0]);
1397 }
1398 else if ((wsp->ws_options & WRDSO_PARAMV) && str[0] == '#')
1399 {
1400 char b[16];
1401 snprintf (b, sizeof(b), "%d", (int) wsp->ws_paramc);
1402 value = strdup (b);
1403 if (!value)
1404 return _wsplt_nomem (wsp);
1405 if (wsnode_new (wsp, &newnode))
1406 return 1;
1407 wsnode_insert (wsp, newnode, *ptail, 0);
1408 *ptail = newnode;
1409 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1410 newnode->v.word = value;
1411 return 0;
1412 }
1413 else if ((wsp->ws_options & WRDSO_PARAMV) && str[0] == '*')
1414 {
1415 return expand_paramv (wsp, ptail, flg, 0);
1416 }
1417 else if ((wsp->ws_options & WRDSO_PARAMV) && str[0] == '@')
1418 {
1419 return expand_paramv (wsp, ptail, flg, 1);
1420 }
1421 else if (str[0] == '{'
1422 && (ISVARBEG (str[1])
1423 || (is_param = (((wsp->ws_options & WRDSO_PARAMV)
1424 && ISDIGIT (str[1]))
1425 || ((wsp->ws_options & WRDSO_PARAM_NEGIDX)
1426 && (str[1] == '-'
1427 && ISDIGIT (str[2]))))) != 0))
1428 {
1429 str++;
1430 len--;
1431 for (i = str[0] == '-' ? 1 : 0; i < len; i++)
1432 {
1433 if (str[i] == ':')
1434 {
1435 size_t j;
1436
1437 defstr = str + i + 1;
1438 if (find_closing_paren (str, i + 1, len, &j, "{}"))
1439 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1440 *pend = str + j;
1441 break;
1442 }
1443 else if (str[i] == '}')
1444 {
1445 defstr = NULL;
1446 *pend = str + i;
1447 break;
1448 }
1449 else if (strchr ("-+?=", str[i]))
1450 {
1451 size_t j;
1452
1453 defstr = str + i;
1454 if (find_closing_paren (str, i, len, &j, "{}"))
1455 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1456 *pend = str + j;
1457 break;
1458 }
1459 else if (is_param)
1460 {
1461 if (ISDIGIT (str[i]))
1462 {
1463 param_idx = param_idx * 10 + to_num (str[i]);
1464 if ((str[0] == '-' && -param_idx < INT_MIN)
1465 || param_idx > INT_MAX)
1466 return expvar_recover (wsp, str - 1, ptail, pend, flg);
1467 }
1468 else
1469 {
1470 return expvar_recover (wsp, str - 1, ptail, pend, flg);
1471 }
1472 }
1473 else if (!ISVARCHR (str[i]))
1474 {
1475 return expvar_recover (wsp, str - 1, ptail, pend, flg);
1476 }
1477 }
1478
1479 if (is_param && str[0] == '-')
1480 param_idx = wsp->ws_paramc - param_idx;
1481
1482 if (i == len)
1483 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1484 }
1485 else
1486 {
1487 return expvar_recover (wsp, str, ptail, pend, flg);
1488 }
1489
1490 /* Actually expand the variable */
1491 /* str - start of the variable name
1492 i - its length
1493 defstr - default replacement str */
1494
1495 if (defstr && strchr("-+?=", defstr[0]) == 0)
1496 {
1497 rc = WRDSE_UNDEF;
1498 defstr = NULL;
1499 }
1500 else
1501 {
1502 if (is_param)
1503 {
1504 if (param_idx >= 0 && param_idx < wsp->ws_paramc)
1505 {
1506 value = strdup (wsp->ws_paramv[param_idx]);
1507 if (!value)
1508 rc = WRDSE_NOSPACE;
1509 else
1510 rc = WRDSE_OK;
1511 }
1512 else
1513 rc = WRDSE_UNDEF;
1514 }
1515 else
1516 {
1517 if (wsp->ws_flags & WRDSF_GETVAR)
1518 {
1519 if (wsp->ws_options & WRDSO_GETVARPREF)
1520 {
1521 rc = wsplt_env_getvar (wsp, str, i, &value);
1522 if (rc == WRDSE_UNDEF)
1523 rc = wsplt_env_lookup (wsp, str, i, &value);
1524 }
1525 else
1526 {
1527 rc = wsplt_env_lookup (wsp, str, i, &value);
1528 if (rc == WRDSE_UNDEF)
1529 rc = wsplt_env_getvar (wsp, str, i, &value);
1530 }
1531 }
1532 else
1533 rc = wsplt_env_lookup (wsp, str, i, &value);
1534 }
1535
1536 if (rc == WRDSE_OK
1537 && (!value || value[0] == 0)
1538 && defstr && defstr[-1] == ':')
1539 {
1540 free (value);
1541 rc = WRDSE_UNDEF;
1542 }
1543 }
1544
1545 switch (rc)
1546 {
1547 case WRDSE_OK:
1548 if (defstr && *defstr == '+')
1549 {
1550 size_t size = *pend - ++defstr;
1551
1552 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1553 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1554 (wsp->ws_flags &
1555 (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
1556 if (rc)
1557 return rc;
1558 free (value);
1559 value = ws.ws_wordv[0];
1560 ws.ws_wordv[0] = NULL;
1561 wordsplit_free (&ws);
1562 }
1563 break;
1564
1565 case WRDSE_UNDEF:
1566 if (defstr)
1567 {
1568 size_t size;
1569 if (*defstr == '-' || *defstr == '=')
1570 {
1571 size = *pend - ++defstr;
1572
1573 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1574 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1575 (wsp->ws_flags &
1576 (WRDSF_NOVAR | WRDSF_NOCMD)),
1577 1);
1578 if (rc)
1579 return rc;
1580
1581 value = ws.ws_wordv[0];
1582 ws.ws_wordv[0] = NULL;
1583 wordsplit_free (&ws);
1584
1585 if (defstr[-1] == '=')
1586 {
1587 if (is_param)
1588 rc = wsplt_assign_param (wsp, param_idx, value);
1589 else
1590 rc = wsplt_assign_var (wsp, str, i, value);
1591 }
1592 if (rc)
1593 {
1594 free (value);
1595 return rc;
1596 }
1597 }
1598 else
1599 {
1600 if (*defstr == '?')
1601 {
1602 size = *pend - ++defstr;
1603 if (size == 0)
1604 wsp->ws_error (_("%.*s: variable null or not set"),
1605 (int) i, str);
1606 else
1607 {
1608 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1609 WRDSF_NOSPLIT | WRDSF_WS |
1610 WRDSF_QUOTE |
1611 (wsp->ws_flags &
1612 (WRDSF_NOVAR | WRDSF_NOCMD)),
1613 1);
1614 if (rc == 0)
1615 wsp->ws_error ("%.*s: %s",
1616 (int) i, str, ws.ws_wordv[0]);
1617 else
1618 wsp->ws_error ("%.*s: %.*s",
1619 (int) i, str, (int) size, defstr);
1620 wordsplit_free (&ws);
1621 }
1622 }
1623 value = NULL;
1624 }
1625 }
1626 else if (wsp->ws_flags & WRDSF_UNDEF)
1627 {
1628 _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, *pend - str + 1);
1629 return 1;
1630 }
1631 else
1632 {
1633 if (wsp->ws_flags & WRDSF_WARNUNDEF)
1634 wsp->ws_error (_("warning: undefined variable `%.*s'"),
1635 (int) i, str);
1636 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1637 value = NULL;
1638 else
1639 {
1640 value = strdup ("");
1641 if (!value)
1642 return _wsplt_nomem (wsp);
1643 }
1644 }
1645 break;
1646
1647 case WRDSE_NOSPACE:
1648 return _wsplt_nomem (wsp);
1649
1650 case WRDSE_USERERR:
1651 if (wsp->ws_errno == WRDSE_USERERR)
1652 free (wsp->ws_usererr);
1653 wsp->ws_usererr = value;
1654 /* fall through */
1655 default:
1656 _wsplt_seterr (wsp, rc);
1657 return 1;
1658 }
1659
1660 if (value)
1661 {
1662 if (flg & _WSNF_QUOTE)
1663 {
1664 if (wsnode_new (wsp, &newnode))
1665 return 1;
1666 wsnode_insert (wsp, newnode, *ptail, 0);
1667 *ptail = newnode;
1668 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1669 newnode->v.word = value;
1670 }
1671 else if (*value == 0)
1672 {
1673 free (value);
1674 /* Empty string is a special case */
1675 if (wsnode_new (wsp, &newnode))
1676 return 1;
1677 wsnode_insert (wsp, newnode, *ptail, 0);
1678 *ptail = newnode;
1679 newnode->flags = _WSNF_NULL;
1680 }
1681 else
1682 {
1683 struct wordsplit ws;
1684 int rc;
1685
1686 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1687 WRDSF_NOVAR | WRDSF_NOCMD |
1688 WRDSF_QUOTE
1689 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
1690 0);
1691 free (value);
1692 if (rc)
1693 {
1694 _wsplt_seterr_sub (wsp, &ws);
1695 wordsplit_free (&ws);
1696 return 1;
1697 }
1698 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1699 *ptail = ws.ws_tail;
1700 ws.ws_head = ws.ws_tail = NULL;
1701 wordsplit_free (&ws);
1702 }
1703 }
1704 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1705 {
1706 size_t size = *pend - start + 1;
1707
1708 if (wsnode_new (wsp, &newnode))
1709 return 1;
1710 wsnode_insert (wsp, newnode, *ptail, 0);
1711 *ptail = newnode;
1712 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1713 newnode->v.word = malloc (size + 1);
1714 if (!newnode->v.word)
1715 return _wsplt_nomem (wsp);
1716 memcpy (newnode->v.word, start, size);
1717 newnode->v.word[size] = 0;
1718 }
1719 else
1720 {
1721 if (wsnode_new (wsp, &newnode))
1722 return 1;
1723 wsnode_insert (wsp, newnode, *ptail, 0);
1724 *ptail = newnode;
1725 newnode->flags = _WSNF_NULL;
1726 }
1727 return 0;
1728}
1729
1730static int
1731begin_var_p (int c)
1732{
1733 return memchr ("{#@*", c, 4) != NULL || ISVARBEG (c) || ISDIGIT (c);
1734}
1735
1736static int
1737node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
1738 int (*beg_p) (int),
1739 int (*ws_exp_fn) (struct wordsplit *wsp,
1740 const char *str, size_t len,
1741 struct wordsplit_node **ptail,
1742 const char **pend,
1743 int flg))
1744{
1745 const char *str = wsnode_ptr (wsp, node);
1746 size_t slen = wsnode_len (node);
1747 const char *end = str + slen;
1748 const char *p;
1749 size_t off = 0;
1750 struct wordsplit_node *tail = node;
1751
1752 for (p = str; p < end; p++)
1753 {
1754 if (*p == '\\')
1755 {
1756 p++;
1757 continue;
1758 }
1759 if (*p == '$' && beg_p (p[1]))
1760 {
1761 size_t n = p - str;
1762
1763 if (tail != node)
1764 tail->flags |= _WSNF_JOIN;
1765 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
1766 return 1;
1767 p++;
1768 if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
1769 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
1770 return 1;
1771 off += p - str + 1;
1772 str = p + 1;
1773 }
1774 }
1775 if (p > str)
1776 {
1777 if (tail != node)
1778 tail->flags |= _WSNF_JOIN;
1779 if (node_split_prefix (wsp, &tail, node, off, p - str,
1780 node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
1781 return 1;
1782 }
1783 if (tail != node)
1784 {
1785 wsnode_remove (wsp, node);
1786 wsnode_free (node);
1787 }
1788 return 0;
1789}
1790
1791/* Remove NULL nodes from the list */
1792static void
1793wsnode_nullelim (struct wordsplit *wsp)
1794{
1795 struct wordsplit_node *p;
1796
1797 for (p = wsp->ws_head; p;)
1798 {
1799 struct wordsplit_node *next = p->next;
1800 if (p->flags & _WSNF_DELIM && p->prev)
1801 p->prev->flags &= ~_WSNF_JOIN;
1802 if (p->flags & _WSNF_NULL)
1803 {
1804 wsnode_remove (wsp, p);
1805 wsnode_free (p);
1806 }
1807 p = next;
1808 }
1809}
1810
1811static int
1812wordsplit_varexp (struct wordsplit *wsp)
1813{
1814 struct wordsplit_node *p;
1815
1816 for (p = wsp->ws_head; p;)
1817 {
1818 struct wordsplit_node *next = p->next;
1819 if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
1820 if (node_expand (wsp, p, begin_var_p, expvar))
1821 return 1;
1822 p = next;
1823 }
1824
1825 wsnode_nullelim (wsp);
1826 return 0;
1827}
1828
1829static int
1830begin_cmd_p (int c)
1831{
1832 return c == '(';
1833}
1834
1835static int
1836expcmd (struct wordsplit *wsp, const char *str, size_t len,
1837 struct wordsplit_node **ptail, const char **pend, int flg)
1838{
1839 int rc;
1840 size_t j;
1841 char *value;
1842 struct wordsplit_node *newnode;
1843 struct wordsplit ws;
1844
1845 str++;
1846 len--;
1847
1848 if (find_closing_paren (str, 0, len, &j, "()"))
1849 {
1850 _wsplt_seterr (wsp, WRDSE_PAREN);
1851 return 1;
1852 }
1853
1854 *pend = str + j;
1855 rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1);
1856 if (rc)
1857 {
1858 _wsplt_seterr_sub (wsp, &ws);
1859 wordsplit_free (&ws);
1860 return 1;
1861 }
1862 rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
1863 wordsplit_free (&ws);
1864
1865 if (rc == WRDSE_NOSPACE)
1866 return _wsplt_nomem (wsp);
1867 else if (rc)
1868 {
1869 if (rc == WRDSE_USERERR)
1870 {
1871 if (wsp->ws_errno == WRDSE_USERERR)
1872 free (wsp->ws_usererr);
1873 wsp->ws_usererr = value;
1874 }
1875 _wsplt_seterr (wsp, rc);
1876 return 1;
1877 }
1878
1879 if (value)
1880 {
1881 if (flg & _WSNF_QUOTE)
1882 {
1883 if (wsnode_new (wsp, &newnode))
1884 return 1;
1885 wsnode_insert (wsp, newnode, *ptail, 0);
1886 *ptail = newnode;
1887 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1888 newnode->v.word = value;
1889 }
1890 else if (*value == 0)
1891 {
1892 free (value);
1893 /* Empty string is a special case */
1894 if (wsnode_new (wsp, &newnode))
1895 return 1;
1896 wsnode_insert (wsp, newnode, *ptail, 0);
1897 *ptail = newnode;
1898 newnode->flags = _WSNF_NULL;
1899 }
1900 else
1901 {
1902 struct wordsplit ws;
1903 int rc;
1904
1905 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1906 WRDSF_NOVAR | WRDSF_NOCMD
1907 | WRDSF_WS | WRDSF_QUOTE
1908 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0),
1909 0);
1910 free (value);
1911 if (rc)
1912 {
1913 _wsplt_seterr_sub (wsp, &ws);
1914 wordsplit_free (&ws);
1915 return 1;
1916 }
1917 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1918 *ptail = ws.ws_tail;
1919 ws.ws_head = ws.ws_tail = NULL;
1920 wordsplit_free (&ws);
1921 }
1922 }
1923 else
1924 {
1925 if (wsnode_new (wsp, &newnode))
1926 return 1;
1927 wsnode_insert (wsp, newnode, *ptail, 0);
1928 *ptail = newnode;
1929 newnode->flags = _WSNF_NULL;
1930 }
1931 return 0;
1932}
1933
1934static int
1935wordsplit_cmdexp (struct wordsplit *wsp)
1936{
1937 struct wordsplit_node *p;
1938
1939 for (p = wsp->ws_head; p;)
1940 {
1941 struct wordsplit_node *next = p->next;
1942 if (!(p->flags & _WSNF_NOEXPAND))
1943 if (node_expand (wsp, p, begin_cmd_p, expcmd))
1944 return 1;
1945 p = next;
1946 }
1947
1948 wsnode_nullelim (wsp);
1949 return 0;
1950}
1951
1952/* Strip off any leading and trailing whitespace. This function is called
1953 right after the initial scanning, therefore it assumes that every
1954 node in the list is a text reference node. */
1955static int
1956wordsplit_trimws (struct wordsplit *wsp)
1957{
1958 struct wordsplit_node *p;
1959
1960 for (p = wsp->ws_head; p; p = p->next)
1961 {
1962 size_t n;
1963
1964 if (!(p->flags & _WSNF_QUOTE))
1965 {
1966 /* Skip leading whitespace: */
1967 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
1968 n++)
1969 ;
1970 p->v.segm.beg = n;
1971 }
1972
1973 while (p->next && (p->flags & _WSNF_JOIN))
1974 p = p->next;
1975
1976 if (p->flags & _WSNF_QUOTE)
1977 continue;
1978
1979 /* Trim trailing whitespace */
1980 for (n = p->v.segm.end;
1981 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
1982 p->v.segm.end = n;
1983 if (p->v.segm.beg == p->v.segm.end)
1984 p->flags |= _WSNF_NULL;
1985 }
1986
1987 wsnode_nullelim (wsp);
1988 return 0;
1989}
1990
1991static int
1992wordsplit_tildexpand (struct wordsplit *wsp)
1993{
1994 struct wordsplit_node *p;
1995 char *uname = NULL;
1996 size_t usize = 0;
1997
1998 for (p = wsp->ws_head; p; p = p->next)
1999 {
2000 const char *str;
2001
2002 if (p->flags & _WSNF_QUOTE)
2003 continue;
2004
2005 str = wsnode_ptr (wsp, p);
2006 if (str[0] == '~')
2007 {
2008 size_t i, size, dlen;
2009 size_t slen = wsnode_len (p);
2010 struct passwd *pw;
2011 char *newstr;
2012
2013 for (i = 1; i < slen && str[i] != '/'; i++)
2014 ;
2015 if (i == slen)
2016 continue;
2017 if (i > 1)
2018 {
2019 if (i > usize)
2020 {
2021 char *p = realloc (uname, i);
2022 if (!p)
2023 {
2024 free (uname);
2025 return _wsplt_nomem (wsp);
2026 }
2027 uname = p;
2028 usize = i;
2029 }
2030 --i;
2031 memcpy (uname, str + 1, i);
2032 uname[i] = 0;
2033 pw = getpwnam (uname);
2034 }
2035 else
2036 pw = getpwuid (getuid ());
2037
2038 if (!pw)
2039 continue;
2040
2041 dlen = strlen (pw->pw_dir);
2042 size = slen - i + dlen;
2043 newstr = malloc (size);
2044 if (!newstr)
2045 {
2046 free (uname);
2047 return _wsplt_nomem (wsp);
2048 }
2049 --size;
2050
2051 memcpy (newstr, pw->pw_dir, dlen);
2052 memcpy (newstr + dlen, str + i + 1, slen - i - 1);
2053 newstr[size] = 0;
2054 if (p->flags & _WSNF_WORD)
2055 free (p->v.word);
2056 p->v.word = newstr;
2057 p->flags |= _WSNF_WORD;
2058 }
2059 }
2060 free (uname);
2061 return 0;
2062}
2063
2064static int
2065isglob (const char *s, int l)
2066{
2067 while (l--)
2068 {
2069 if (strchr ("*?[", *s++))
2070 return 1;
2071 }
2072 return 0;
2073}
2074
2075static int
2076wordsplit_pathexpand (struct wordsplit *wsp)
2077{
2078 struct wordsplit_node *p, *next;
2079 char *pattern = NULL;
2080 size_t patsize = 0;
2081 size_t slen;
2082 int flags = 0;
2083
2084#ifdef GLOB_PERIOD
2085 if (wsp->ws_options & WRDSO_DOTGLOB)
2086 flags = GLOB_PERIOD;
2087#endif
2088
2089 for (p = wsp->ws_head; p; p = next)
2090 {
2091 const char *str;
2092
2093 next = p->next;
2094
2095 if (p->flags & _WSNF_QUOTE)
2096 continue;
2097
2098 str = wsnode_ptr (wsp, p);
2099 slen = wsnode_len (p);
2100
2101 if (isglob (str, slen))
2102 {
2103 int i;
2104 glob_t g;
2105 struct wordsplit_node *prev;
2106
2107 if (slen + 1 > patsize)
2108 {
2109 char *p = realloc (pattern, slen + 1);
2110 if (!p)
2111 return _wsplt_nomem (wsp);
2112 pattern = p;
2113 patsize = slen + 1;
2114 }
2115 memcpy (pattern, str, slen);
2116 pattern[slen] = 0;
2117
2118 switch (glob (pattern, flags, NULL, &g))
2119 {
2120 case 0:
2121 break;
2122
2123 case GLOB_NOSPACE:
2124 free (pattern);
2125 return _wsplt_nomem (wsp);
2126
2127 case GLOB_NOMATCH:
2128 if (wsp->ws_options & WRDSO_NULLGLOB)
2129 {
2130 wsnode_remove (wsp, p);
2131 wsnode_free (p);
2132 }
2133 else if (wsp->ws_options & WRDSO_FAILGLOB)
2134 {
2135 char buf[128];
2136 if (wsp->ws_errno == WRDSE_USERERR)
2137 free (wsp->ws_usererr);
2138 snprintf (buf, sizeof (buf), _("no files match pattern %s"),
2139 pattern);
2140 free (pattern);
2141 wsp->ws_usererr = strdup (buf);
2142 if (!wsp->ws_usererr)
2143 return _wsplt_nomem (wsp);
2144 else
2145 return _wsplt_seterr (wsp, WRDSE_USERERR);
2146 }
2147 continue;
2148
2149 default:
2150 free (pattern);
2151 return _wsplt_setctxerr (wsp, WRDSE_GLOBERR, pattern, slen);
2152 }
2153
2154 prev = p;
2155 for (i = 0; i < g.gl_pathc; i++)
2156 {
2157 struct wordsplit_node *newnode;
2158 char *newstr;
2159
2160 if (wsnode_new (wsp, &newnode))
2161 return 1;
2162 newstr = strdup (g.gl_pathv[i]);
2163 if (!newstr)
2164 return _wsplt_nomem (wsp);
2165 newnode->v.word = newstr;
2166 newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
2167 wsnode_insert (wsp, newnode, prev, 0);
2168 prev = newnode;
2169 }
2170 globfree (&g);
2171
2172 wsnode_remove (wsp, p);
2173 wsnode_free (p);
2174 }
2175 }
2176 free (pattern);
2177 return 0;
2178}
2179
2180static int
2181skip_sed_expr (const char *command, size_t i, size_t len)
2182{
2183 int state;
2184
2185 do
2186 {
2187 int delim;
2188
2189 if (command[i] == ';')
2190 i++;
2191 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
2192 break;
2193
2194 delim = command[++i];
2195 state = 1;
2196 for (i++; i < len; i++)
2197 {
2198 if (state == 3)
2199 {
2200 if (command[i] == delim || !ISALNUM (command[i]))
2201 break;
2202 }
2203 else if (command[i] == '\\')
2204 i++;
2205 else if (command[i] == delim)
2206 state++;
2207 }
2208 }
2209 while (state == 3 && i < len && command[i] == ';');
2210 return i;
2211}
2212
2213/* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
2214 is true, return its value, otherwise return the index past it. */
2215static inline size_t
2216skip_delim_internal (struct wordsplit *wsp, int return_delims)
2217{
2218 return return_delims ? wsp->ws_endp : wsp->ws_endp + 1;
2219}
2220
2221static inline size_t
2222skip_delim (struct wordsplit *wsp)
2223{
2224 return skip_delim_internal (wsp, WSP_RETURN_DELIMS (wsp));
2225}
2226
2227static inline size_t
2228skip_delim_real (struct wordsplit *wsp)
2229{
2230 return skip_delim_internal (wsp, wsp->ws_flags & WRDSF_RETURN_DELIMS);
2231}
2232
2233#define _WRDS_EOF 0
2234#define _WRDS_OK 1
2235#define _WRDS_ERR 2
2236
2237static int
2238scan_qstring (struct wordsplit *wsp, size_t start, size_t *end)
2239{
2240 size_t j;
2241 const char *command = wsp->ws_input;
2242 size_t len = wsp->ws_len;
2243 char q = command[start];
2244
2245 for (j = start + 1; j < len && command[j] != q; j++)
2246 if (q == '"' && command[j] == '\\')
2247 j++;
2248 if (j < len && command[j] == q)
2249 {
2250 int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
2251 if (q == '\'')
2252 flags |= _WSNF_NOEXPAND;
2253 if (wordsplit_add_segm (wsp, start + 1, j, flags))
2254 return _WRDS_ERR;
2255 *end = j;
2256 }
2257 else
2258 {
2259 wsp->ws_endp = start;
2260 _wsplt_seterr (wsp, WRDSE_QUOTE);
2261 return _WRDS_ERR;
2262 }
2263 return 0;
2264}
2265
2266static int
2267scan_word (struct wordsplit *wsp, size_t start, int consume_all)
2268{
2269 size_t len = wsp->ws_len;
2270 const char *command = wsp->ws_input;
2271 const char *comment = wsp->ws_comment;
2272 int join = 0;
2273 int flags = 0;
2274 struct wordsplit_node *np = wsp->ws_tail;
2275
2276 size_t i = start;
2277
2278 if (i >= len)
2279 {
2280 wsp->ws_errno = WRDSE_EOF;
2281 return _WRDS_EOF;
2282 }
2283
2284 start = i;
2285
2286 if (wsp->ws_flags & WRDSF_SED_EXPR
2287 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
2288 {
2289 flags = _WSNF_SEXP;
2290 i = skip_sed_expr (command, i, len);
2291 }
2292 else if (consume_all || !ISDELIM (wsp, command[i]))
2293 {
2294 while (i < len)
2295 {
2296 if (comment && strchr (comment, command[i]) != NULL)
2297 {
2298 size_t j;
2299 for (j = i + 1; j < len && command[j] != '\n'; j++)
2300 ;
2301 if (wordsplit_add_segm (wsp, start, i, 0))
2302 return _WRDS_ERR;
2303 wsp->ws_endp = j;
2304 return _WRDS_OK;
2305 }
2306
2307 if (wsp->ws_flags & WRDSF_QUOTE)
2308 {
2309 if (command[i] == '\\')
2310 {
2311 if (++i == len)
2312 break;
2313 i++;
2314 continue;
2315 }
2316
2317 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
2318 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
2319 {
2320 if (join && wsp->ws_tail)
2321 wsp->ws_tail->flags |= _WSNF_JOIN;
2322 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
2323 return _WRDS_ERR;
2324 if (scan_qstring (wsp, i, &i))
2325 return _WRDS_ERR;
2326 start = i + 1;
2327 join = 1;
2328 }
2329 }
2330
2331 if (command[i] == '$')
2332 {
2333 if ((!(wsp->ws_flags & WRDSF_NOVAR)
2334 || (wsp->ws_options & WRDSO_NOVARSPLIT))
2335 && command[i+1] == '{'
2336 && find_closing_paren (command, i + 2, len, &i, "{}") == 0)
2337 continue;
2338 if ((!(wsp->ws_flags & WRDSF_NOCMD)
2339 || (wsp->ws_options & WRDSO_NOCMDSPLIT))
2340 && command[i+1] == '('
2341 && find_closing_paren (command, i + 2, len, &i, "()") == 0)
2342 continue;
2343 }
2344
2345 if (!consume_all && ISDELIM (wsp, command[i]))
2346 break;
2347 else
2348 i++;
2349 }
2350 }
2351 else if (WSP_RETURN_DELIMS (wsp))
2352 {
2353 i++;
2354 flags |= _WSNF_DELIM;
2355 }
2356 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
2357 flags |= _WSNF_EMPTYOK;
2358
2359 if (join && i > start && wsp->ws_tail)
2360 wsp->ws_tail->flags |= _WSNF_JOIN;
2361 if (wordsplit_add_segm (wsp, start, i, flags))
2362 return _WRDS_ERR;
2363 wsp->ws_endp = i;
2364 if (wsp->ws_flags & WRDSF_INCREMENTAL)
2365 return _WRDS_EOF;
2366
2367 if (consume_all)
2368 {
2369 if (!np)
2370 np = wsp->ws_head;
2371 while (np)
2372 {
2373 np->flags |= _WSNF_QUOTE;
2374 np = np->next;
2375 }
2376 }
2377
2378 return _WRDS_OK;
2379}
2380
2381static int
2382xtonum (int *pval, const char *src, int base, int cnt)
2383{
2384 int i, val;
2385
2386 for (i = 0, val = 0; i < cnt; i++, src++)
2387 {
2388 int n = *(unsigned char *) src;
2389 if (n > 127 || (n = to_num (n)) >= base)
2390 break;
2391 val = val * base + n;
2392 }
2393 *pval = val;
2394 return i;
2395}
2396
2397size_t
2398wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
2399{
2400 size_t len = 0;
2401
2402 *quote = 0;
2403 for (; *str; str++)
2404 {
2405 if (strchr (" \"", *str))
2406 *quote = 1;
2407
2408 if (*str == ' ')
2409 len++;
2410 else if (*str == '"')
2411 len += 2;
2412 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
2413 len++;
2414 else if (quote_hex)
2415 len += 3;
2416 else
2417 {
2418 if (wordsplit_c_quote_char (*str))
2419 len += 2;
2420 else
2421 len += 4;
2422 }
2423 }
2424 return len;
2425}
2426
2427static int
2428wsplt_unquote_char (const char *transtab, int c)
2429{
2430 while (*transtab && transtab[1])
2431 {
2432 if (*transtab++ == c)
2433 return *transtab;
2434 ++transtab;
2435 }
2436 return 0;
2437}
2438
2439static int
2440wsplt_quote_char (const char *transtab, int c)
2441{
2442 for (; *transtab && transtab[1]; transtab += 2)
2443 {
2444 if (transtab[1] == c)
2445 return *transtab;
2446 }
2447 return 0;
2448}
2449
2450int
2451wordsplit_c_unquote_char (int c)
2452{
2453 return wsplt_unquote_char (wordsplit_c_escape_tab, c);
2454}
2455
2456int
2457wordsplit_c_quote_char (int c)
2458{
2459 return wsplt_quote_char (wordsplit_c_escape_tab, c);
2460}
2461
2462void
2463wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
2464 char *dst, const char *src, size_t n)
2465{
2466 int i = 0;
2467 int c;
2468
2469 inquote = !!inquote;
2470 while (i < n)
2471 {
2472 if (src[i] == '\\')
2473 {
2474 ++i;
2475 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
2476 && (src[i] == 'x' || src[i] == 'X'))
2477 {
2478 if (n - i < 2)
2479 {
2480 *dst++ = '\\';
2481 *dst++ = src[i++];
2482 }
2483 else
2484 {
2485 int off = xtonum (&c, src + i + 1,
2486 16, 2);
2487 if (off == 0)
2488 {
2489 *dst++ = '\\';
2490 *dst++ = src[i++];
2491 }
2492 else
2493 {
2494 *dst++ = c;
2495 i += off + 1;
2496 }
2497 }
2498 }
2499 else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
2500 && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
2501 {
2502 if (n - i < 1)
2503 {
2504 *dst++ = '\\';
2505 *dst++ = src[i++];
2506 }
2507 else
2508 {
2509 int off = xtonum (&c, src + i, 8, 3);
2510 if (off == 0)
2511 {
2512 *dst++ = '\\';
2513 *dst++ = src[i++];
2514 }
2515 else
2516 {
2517 *dst++ = c;
2518 i += off;
2519 }
2520 }
2521 }
2522 else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
2523 {
2524 *dst++ = c;
2525 ++i;
2526 }
2527 else
2528 {
2529 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
2530 *dst++ = '\\';
2531 *dst++ = src[i++];
2532 }
2533 }
2534 else
2535 *dst++ = src[i++];
2536 }
2537 *dst = 0;
2538}
2539
2540void
2541wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
2542{
2543 for (; *src; src++)
2544 {
2545 if (*src == '"')
2546 {
2547 *dst++ = '\\';
2548 *dst++ = *src;
2549 }
2550 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
2551 *dst++ = *src;
2552 else
2553 {
2554 char tmp[4];
2555
2556 if (quote_hex)
2557 {
2558 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
2559 memcpy (dst, tmp, 3);
2560 dst += 3;
2561 }
2562 else
2563 {
2564 int c = wordsplit_c_quote_char (*src);
2565 *dst++ = '\\';
2566 if (c)
2567 *dst++ = c;
2568 else
2569 {
2570 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
2571 memcpy (dst, tmp, 3);
2572 dst += 3;
2573 }
2574 }
2575 }
2576 }
2577}
2578
2579
2580/* This structure describes a single expansion phase */
2581struct exptab
2582{
2583 char const *descr; /* Textual description (for debugging) */
2584 int flag; /* WRDSF_ bit that controls this phase */
2585 int opt; /* Entry-specific options (see EXPOPT_ flags below */
2586 int (*expansion) (struct wordsplit *wsp); /* expansion function */
2587};
2588
2589/* The following options control expansions: */
2590/* Normally the exptab entry is run if its flag bit is set in struct
2591 wordsplit. The EXPOPT_NEG option negates this test so that expansion
2592 is performed if its associated flag bit is not set in struct wordsplit. */
2593#define EXPOPT_NEG 0x01
2594/* All bits in flag must be set in order for entry to match */
2595#define EXPORT_ALLOF 0x02
2596/* Coalesce the input list before running the expansion. */
2597#define EXPOPT_COALESCE 0x04
2598
2599static struct exptab exptab[] = {
2600 { N_("WS trimming"), WRDSF_WS, 0,
2601 wordsplit_trimws },
2602 { N_("command substitution"), WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
2603 wordsplit_cmdexp },
2604 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2605 NULL },
2606 { N_("tilde expansion"), WRDSF_PATHEXPAND, 0,
2607 wordsplit_tildexpand },
2608 { N_("variable expansion"), WRDSF_NOVAR, EXPOPT_NEG,
2609 wordsplit_varexp },
2610 { N_("quote removal"), 0, EXPOPT_NEG,
2611 wsnode_quoteremoval },
2612 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2613 NULL },
2614 { N_("path expansion"), WRDSF_PATHEXPAND, 0,
2615 wordsplit_pathexpand },
2616 { NULL }
2617};
2618
2619static inline int
2620exptab_matches(struct exptab *p, struct wordsplit *wsp)
2621{
2622 int result;
2623
2624 result = (wsp->ws_flags & p->flag);
2625 if (p->opt & EXPORT_ALLOF)
2626 result = result == p->flag;
2627 if (p->opt & EXPOPT_NEG)
2628 result = !result;
2629
2630 return result;
2631}
2632
2633static int
2634wordsplit_process_list (struct wordsplit *wsp, size_t start)
2635{
2636 struct exptab *p;
2637
2638 if (wsp->ws_flags & WRDSF_SHOWDBG)
2639 wsp->ws_debug (_("(%02d) Input:%.*s;"),
2640 wsp->ws_lvl, (int) wsp->ws_len, wsp->ws_input);
2641
2642 if ((wsp->ws_flags & WRDSF_NOSPLIT)
2643 || ((wsp->ws_options & WRDSO_MAXWORDS)
2644 && wsp->ws_wordi + 1 == wsp->ws_maxwords))
2645 {
2646 /* Treat entire input as a single word */
2647 if (scan_word (wsp, start, 1) == _WRDS_ERR)
2648 return wsp->ws_errno;
2649 }
2650 else
2651 {
2652 int rc;
2653
2654 while ((rc = scan_word (wsp, start, 0)) == _WRDS_OK)
2655 start = skip_delim (wsp);
2656 /* Make sure tail element is not joinable */
2657 if (wsp->ws_tail)
2658 wsp->ws_tail->flags &= ~_WSNF_JOIN;
2659 if (rc == _WRDS_ERR)
2660 return wsp->ws_errno;
2661 }
2662
2663 if (wsp->ws_flags & WRDSF_SHOWDBG)
2664 {
2665 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
2666 wordsplit_dump_nodes (wsp);
2667 }
2668
2669 for (p = exptab; p->descr; p++)
2670 {
2671 if (exptab_matches(p, wsp))
2672 {
2673 if (p->opt & EXPOPT_COALESCE)
2674 {
2675 if (wsnode_coalesce (wsp))
2676 break;
2677 if (wsp->ws_flags & WRDSF_SHOWDBG)
2678 {
2679 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
2680 _("Coalesced list:"));
2681 wordsplit_dump_nodes (wsp);
2682 }
2683 }
2684 if (p->expansion)
2685 {
2686 if (p->expansion (wsp))
2687 break;
2688 if (wsp->ws_flags & WRDSF_SHOWDBG)
2689 {
2690 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
2691 wordsplit_dump_nodes (wsp);
2692 }
2693 }
2694 }
2695 }
2696
2697 return wsp->ws_errno;
2698}
2699
2700static int
2701wordsplit_run (const char *command, size_t length, struct wordsplit *wsp,
2702 int flags, int lvl)
2703{
2704 int rc;
2705 size_t start;
2706
2707 /* Initialize error context early */
2708 wsp->ws_errctx = NULL;
2709 if (!command)
2710 {
2711 if (!(flags & WRDSF_INCREMENTAL))
2712 return _wsplt_seterr (wsp, WRDSE_USAGE);
2713
2714 if (wsp->ws_head)
2715 return wordsplit_finish (wsp);
2716
2717 start = skip_delim_real (wsp);
2718 if (wsp->ws_endp == wsp->ws_len)
2719 return _wsplt_seterr (wsp, WRDSE_NOINPUT);
2720
2721 wsp->ws_flags |= WRDSF_REUSE;
2722 wordsplit_init0 (wsp);
2723 }
2724 else
2725 {
2726 start = 0;
2727 rc = wordsplit_init (wsp, command, length, flags);
2728 if (rc)
2729 return rc;
2730 wsp->ws_lvl = lvl;
2731 }
2732
2733 rc = wordsplit_process_list (wsp, start);
2734 if (rc)
2735 return rc;
2736 return wordsplit_finish (wsp);
2737}
2738
2739int
2740wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
2741 int flags)
2742{
2743 return wordsplit_run (command, length, wsp, flags, 0);
2744}
2745
2746int
2747wordsplit (const char *command, struct wordsplit *ws, int flags)
2748{
2749 return wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
2750}
2751
2752void
2753wordsplit_free_words (struct wordsplit *ws)
2754{
2755 size_t i;
2756
2757 for (i = 0; i < ws->ws_wordc; i++)
2758 {
2759 char *p = ws->ws_wordv[ws->ws_offs + i];
2760 if (p)
2761 {
2762 free (p);
2763 ws->ws_wordv[ws->ws_offs + i] = NULL;
2764 }
2765 }
2766 ws->ws_wordc = 0;
2767}
2768
2769void
2770wordsplit_free_envbuf (struct wordsplit *ws)
2771{
2772 if (!(ws->ws_flags & WRDSF_ENV))
2773 return;
2774 if (ws->ws_envbuf)
2775 {
2776 size_t i;
2777
2778 for (i = 0; ws->ws_envbuf[i]; i++)
2779 free (ws->ws_envbuf[i]);
2780 free (ws->ws_envbuf);
2781 ws->ws_envidx = ws->ws_envsiz = 0;
2782 ws->ws_envbuf = NULL;
2783 }
2784}
2785
2786void
2787wordsplit_free_parambuf (struct wordsplit *ws)
2788{
2789 if (!(ws->ws_options & WRDSO_PARAMV))
2790 return;
2791 if (ws->ws_parambuf)
2792 {
2793 size_t i;
2794
2795 for (i = 0; ws->ws_parambuf[i]; i++)
2796 free (ws->ws_parambuf[i]);
2797 free (ws->ws_parambuf);
2798 ws->ws_paramidx = ws->ws_paramsiz = 0;
2799 ws->ws_parambuf = NULL;
2800 }
2801}
2802
2803void
2804wordsplit_clearerr (struct wordsplit *ws)
2805{
2806 if (ws->ws_errno == WRDSE_USERERR)
2807 free (ws->ws_usererr);
2808 ws->ws_usererr = NULL;
2809
2810 free (ws->ws_errctx);
2811 ws->ws_errctx = NULL;
2812
2813 ws->ws_errno = WRDSE_OK;
2814}
2815
2816void
2817wordsplit_free (struct wordsplit *ws)
2818{
2819 if (ws->ws_errno == WRDSE_USAGE)
2820 /* Usage error: the structure is not properly initialized and there's
2821 nothing to free. */
2822 return;
2823 wordsplit_clearerr (ws);
2824 wordsplit_free_nodes (ws);
2825 wordsplit_free_words (ws);
2826 free (ws->ws_wordv);
2827 ws->ws_wordv = NULL;
2828 wordsplit_free_envbuf (ws);
2829 wordsplit_free_parambuf (ws);
2830}
2831
2832int
2833wordsplit_get_words (struct wordsplit *ws, size_t *wordc, char ***wordv)
2834{
2835 char **p = realloc (ws->ws_wordv,
2836 (ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
2837 if (!p)
2838 return -1;
2839 *wordv = p;
2840 *wordc = ws->ws_wordc;
2841
2842 ws->ws_wordv = NULL;
2843 ws->ws_wordc = 0;
2844 ws->ws_wordn = 0;
2845
2846 return 0;
2847}
2848
2849const char *_wordsplit_errstr[] = {
2850 N_("no error"),
2851 N_("missing closing quote"),
2852 N_("memory exhausted"),
2853 N_("invalid wordsplit usage"),
2854 N_("unbalanced curly brace"),
2855 N_("undefined variable"),
2856 N_("input exhausted"),
2857 N_("unbalanced parenthesis"),
2858 N_("globbing error"),
2859 N_("user-defined error"),
2860 N_("invalid parameter number in assignment")
2861};
2862int _wordsplit_nerrs =
2863 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
2864
2865const char *
2866wordsplit_strerror (struct wordsplit *ws)
2867{
2868 if (ws->ws_errno == WRDSE_USERERR)
2869 return ws->ws_usererr;
2870 if (ws->ws_errno < _wordsplit_nerrs)
2871 return _wordsplit_errstr[ws->ws_errno];
2872 return N_("unknown error");
2873}
2874
2875void
2876wordsplit_perror (struct wordsplit *wsp)
2877{
2878 switch (wsp->ws_errno)
2879 {
2880 case WRDSE_QUOTE:
2881 wsp->ws_error (_("missing closing %c (start near #%lu)"),
2882 wsp->ws_input[wsp->ws_endp],
2883 (unsigned long) wsp->ws_endp);
2884 break;
2885
2886 default:
2887 if (wsp->ws_errctx)
2888 wsp->ws_error ("%s: %s", wordsplit_strerror (wsp), wsp->ws_errctx);
2889 else
2890 wsp->ws_error ("%s", wordsplit_strerror (wsp));
2891 }
2892}

Return to:

Send suggestions and report system problems to the System administrator.