aboutsummaryrefslogtreecommitdiff
path: root/src/wordsplit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/wordsplit.c')
-rw-r--r--src/wordsplit.c1624
1 files changed, 1624 insertions, 0 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c
new file mode 100644
index 0000000..9047369
--- /dev/null
+++ b/src/wordsplit.c
@@ -0,0 +1,1624 @@
1/* wordsplit - a word splitter
2 Copyright (C) 2009-2012 Sergey Poznyakoff
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <ctype.h>
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <stdarg.h>
28
29#if ENABLE_NLS
30# include <gettext.h>
31#else
32# define gettext(msgid) msgid
33#endif
34#define _(msgid) gettext (msgid)
35#define N_(msgid) msgid
36
37#include <wordsplit.h>
38
39#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
40#define ISDELIM(ws,c) \
41 (strchr ((ws)->ws_delim, (c)) != NULL)
42#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
43#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
44#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
45#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
46#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
47#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
48#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
49#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
50
51#define ALLOC_INIT 128
52#define ALLOC_INCR 128
53
54static void
55_wsplt_alloc_die (struct wordsplit *wsp)
56{
57 wsp->ws_error (_("memory exhausted"));
58 abort ();
59}
60
61static void
62_wsplt_error (const char *fmt, ...)
63{
64 va_list ap;
65
66 va_start (ap, fmt);
67 vfprintf (stderr, fmt, ap);
68 va_end (ap);
69 fputc ('\n', stderr);
70}
71
72static void wordsplit_free_nodes (struct wordsplit *);
73
74static int
75_wsplt_nomem (struct wordsplit *wsp)
76{
77 errno = ENOMEM;
78 wsp->ws_errno = WRDSE_NOSPACE;
79 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
80 wsp->ws_alloc_die (wsp);
81 if (wsp->ws_flags & WRDSF_SHOWERR)
82 wordsplit_perror (wsp);
83 if (!(wsp->ws_flags & WRDSF_REUSE))
84 wordsplit_free (wsp);
85 wordsplit_free_nodes (wsp);
86 return wsp->ws_errno;
87}
88
89static void
90wordsplit_init0 (struct wordsplit *wsp)
91{
92 if (wsp->ws_flags & WRDSF_REUSE)
93 {
94 if (!(wsp->ws_flags & WRDSF_APPEND))
95 wordsplit_free_words (wsp);
96 }
97 else
98 {
99 wsp->ws_wordv = NULL;
100 wsp->ws_wordc = 0;
101 wsp->ws_wordn = 0;
102 }
103
104 wsp->ws_errno = 0;
105 wsp->ws_head = wsp->ws_tail = NULL;
106}
107
108static int
109wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
110 int flags)
111{
112 wsp->ws_flags = flags;
113
114 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
115 wsp->ws_alloc_die = _wsplt_alloc_die;
116 if (!(wsp->ws_flags & WRDSF_ERROR))
117 wsp->ws_error = _wsplt_error;
118
119 if (!(wsp->ws_flags & WRDSF_NOVAR)
120 && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
121 {
122 errno = EINVAL;
123 wsp->ws_errno = WRDSE_USAGE;
124 if (wsp->ws_flags & WRDSF_SHOWERR)
125 wordsplit_perror (wsp);
126 return wsp->ws_errno;
127 }
128
129 if (!(wsp->ws_flags & WRDSF_NOCMD))
130 {
131 errno = EINVAL;
132 wsp->ws_errno = WRDSE_NOSUPP;
133 if (wsp->ws_flags & WRDSF_SHOWERR)
134 wordsplit_perror (wsp);
135 return wsp->ws_errno;
136 }
137
138 if (wsp->ws_flags & WRDSF_SHOWDBG)
139 {
140 if (!(wsp->ws_flags & WRDSF_DEBUG))
141 {
142 if (wsp->ws_flags & WRDSF_ERROR)
143 wsp->ws_debug = wsp->ws_error;
144 else if (wsp->ws_flags & WRDSF_SHOWERR)
145 wsp->ws_debug = _wsplt_error;
146 else
147 wsp->ws_flags &= ~WRDSF_SHOWDBG;
148 }
149 }
150
151 wsp->ws_input = input;
152 wsp->ws_len = len;
153
154 if (!(wsp->ws_flags & WRDSF_DOOFFS))
155 wsp->ws_offs = 0;
156
157 if (!(wsp->ws_flags & WRDSF_DELIM))
158 wsp->ws_delim = " \t\n";
159
160 if (!(wsp->ws_flags & WRDSF_COMMENT))
161 wsp->ws_comment = NULL;
162
163 if (!(wsp->ws_flags & WRDSF_CLOSURE))
164 wsp->ws_closure = NULL;
165
166 wsp->ws_endp = 0;
167
168 wordsplit_init0 (wsp);
169
170 return 0;
171}
172
173static int
174alloc_space (struct wordsplit *wsp, size_t count)
175{
176 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
177 char **ptr;
178 size_t newalloc;
179
180 if (wsp->ws_wordv == NULL)
181 {
182 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
183 ptr = calloc (newalloc, sizeof (ptr[0]));
184 }
185 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
186 {
187 newalloc = offs + wsp->ws_wordc +
188 (count > ALLOC_INCR ? count : ALLOC_INCR);
189 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
190 }
191 else
192 return 0;
193
194 if (ptr)
195 {
196 wsp->ws_wordn = newalloc;
197 wsp->ws_wordv = ptr;
198 }
199 else
200 return _wsplt_nomem (wsp);
201 return 0;
202}
203
204
205/* Node state flags */
206#define _WSNF_NULL 0x01 /* null node (a noop) */
207#define _WSNF_WORD 0x02 /* node contains word in v.word */
208#define _WSNF_QUOTE 0x04 /* text is quoted */
209#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
210#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
211#define _WSNF_SEXP 0x20 /* is a sed expression */
212
213#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
214 wordsplit_add_segm must add the
215 segment even if it is empty */
216
217struct wordsplit_node
218{
219 struct wordsplit_node *prev; /* Previous element */
220 struct wordsplit_node *next; /* Next element */
221 int flags; /* Node flags */
222 union
223 {
224 struct
225 {
226 size_t beg; /* Start of word in ws_input */
227 size_t end; /* End of word in ws_input */
228 } segm;
229 char *word;
230 } v;
231};
232
233static const char *
234wsnode_flagstr (int flags)
235{
236 static char retbuf[6];
237 char *p = retbuf;
238
239 if (flags & _WSNF_WORD)
240 *p++ = 'w';
241 else if (flags & _WSNF_NULL)
242 *p++ = 'n';
243 else
244 *p++ = '-';
245 if (flags & _WSNF_QUOTE)
246 *p++ = 'q';
247 else
248 *p++ = '-';
249 if (flags & _WSNF_NOEXPAND)
250 *p++ = 'E';
251 else
252 *p++ = '-';