diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2013-07-10 15:16:05 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2013-07-12 11:50:07 +0300 |
commit | 8afb551c895d6870b0d4f427fa8205ae45ad0bf9 (patch) | |
tree | 6e208d7a54d2845035b366a906b35b390ccce96a /src/wordsplit.c | |
download | vmod-dbrw-8afb551c895d6870b0d4f427fa8205ae45ad0bf9.tar.gz vmod-dbrw-8afb551c895d6870b0d4f427fa8205ae45ad0bf9.tar.bz2 |
Initial commit
Diffstat (limited to 'src/wordsplit.c')
-rw-r--r-- | src/wordsplit.c | 1624 |
1 files changed, 1624 insertions, 0 deletions
diff --git a/src/wordsplit.c b/src/wordsplit.c new file mode 100644 index 0000000..9047369 --- /dev/null +++ b/src/wordsplit.c | |||
@@ -0,0 +1,1624 @@ | |||
1 | /* wordsplit - a word splitter | ||
2 | Copyright (C) 2009-2012 Sergey Poznyakoff | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify it | ||
5 | under the terms of the GNU General Public License as published by the | ||
6 | Free Software Foundation; either version 3 of the License, or (at your | ||
7 | option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License along | ||
15 | with this program. If not, see <http://www.gnu.org/licenses/>. */ | ||
16 | |||
17 | #ifdef HAVE_CONFIG_H | ||
18 | # include <config.h> | ||
19 | #endif | ||
20 | |||
21 | #include <errno.h> | ||
22 | #include <ctype.h> | ||
23 | #include <unistd.h> | ||
24 | #include <stdlib.h> | ||
25 | #include <string.h> | ||
26 | #include <stdio.h> | ||
27 | #include <stdarg.h> | ||
28 | |||
29 | #if ENABLE_NLS | ||
30 | # include <gettext.h> | ||
31 | #else | ||
32 | # define gettext(msgid) msgid | ||
33 | #endif | ||
34 | #define _(msgid) gettext (msgid) | ||
35 | #define N_(msgid) msgid | ||
36 | |||
37 | #include <wordsplit.h> | ||
38 | |||
39 | #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') | ||
40 | #define ISDELIM(ws,c) \ | ||
41 | (strchr ((ws)->ws_delim, (c)) != NULL) | ||
42 | #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) | ||
43 | #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') | ||
44 | #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') | ||
45 | #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) | ||
46 | #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') | ||
47 | #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) | ||
48 | #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) | ||
49 | #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) | ||
50 | |||
51 | #define ALLOC_INIT 128 | ||
52 | #define ALLOC_INCR 128 | ||
53 | |||
54 | static void | ||
55 | _wsplt_alloc_die (struct wordsplit *wsp) | ||
56 | { | ||
57 | wsp->ws_error (_("memory exhausted")); | ||
58 | abort (); | ||
59 | } | ||
60 | |||
61 | static void | ||
62 | _wsplt_error (const char *fmt, ...) | ||
63 | { | ||
64 | va_list ap; | ||
65 | |||
66 | va_start (ap, fmt); | ||
67 | vfprintf (stderr, fmt, ap); | ||
68 | va_end (ap); | ||
69 | fputc ('\n', stderr); | ||
70 | } | ||
71 | |||
72 | static void wordsplit_free_nodes (struct wordsplit *); | ||
73 | |||
74 | static int | ||
75 | _wsplt_nomem (struct wordsplit *wsp) | ||
76 | { | ||
77 | errno = ENOMEM; | ||
78 | wsp->ws_errno = WRDSE_NOSPACE; | ||
79 | if (wsp->ws_flags & WRDSF_ENOMEMABRT) | ||
80 | wsp->ws_alloc_die (wsp); | ||
81 | if (wsp->ws_flags & WRDSF_SHOWERR) | ||
82 | wordsplit_perror (wsp); | ||
83 | if (!(wsp->ws_flags & WRDSF_REUSE)) | ||
84 | wordsplit_free (wsp); | ||
85 | wordsplit_free_nodes (wsp); | ||
86 | return wsp->ws_errno; | ||
87 | } | ||
88 | |||
89 | static void | ||
90 | wordsplit_init0 (struct wordsplit *wsp) | ||
91 | { | ||
92 | if (wsp->ws_flags & WRDSF_REUSE) | ||
93 | { | ||
94 | if (!(wsp->ws_flags & WRDSF_APPEND)) | ||
95 | wordsplit_free_words (wsp); | ||
96 | } | ||
97 | else | ||
98 | { | ||
99 | wsp->ws_wordv = NULL; | ||
100 | wsp->ws_wordc = 0; | ||
101 | wsp->ws_wordn = 0; | ||
102 | } | ||
103 | |||
104 | wsp->ws_errno = 0; | ||
105 | wsp->ws_head = wsp->ws_tail = NULL; | ||
106 | } | ||
107 | |||
108 | static int | ||
109 | wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, | ||
110 | int flags) | ||
111 | { | ||
112 | wsp->ws_flags = flags; | ||
113 | |||
114 | if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) | ||
115 | wsp->ws_alloc_die = _wsplt_alloc_die; | ||
116 | if (!(wsp->ws_flags & WRDSF_ERROR)) | ||
117 | wsp->ws_error = _wsplt_error; | ||
118 | |||
119 | if (!(wsp->ws_flags & WRDSF_NOVAR) | ||
120 | && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) | ||
121 | { | ||
122 | errno = EINVAL; | ||
123 | wsp->ws_errno = WRDSE_USAGE; | ||
124 | if (wsp->ws_flags & WRDSF_SHOWERR) | ||
125 | wordsplit_perror (wsp); | ||
126 | return wsp->ws_errno; | ||
127 | } | ||
128 | |||
129 | if (!(wsp->ws_flags & WRDSF_NOCMD)) | ||
130 | { | ||
131 | errno = EINVAL; | ||
132 | wsp->ws_errno = WRDSE_NOSUPP; | ||
133 | if (wsp->ws_flags & WRDSF_SHOWERR) | ||
134 | wordsplit_perror (wsp); | ||
135 | return wsp->ws_errno; | ||
136 | } | ||
137 | |||
138 | if (wsp->ws_flags & WRDSF_SHOWDBG) | ||
139 | { | ||
140 | if (!(wsp->ws_flags & WRDSF_DEBUG)) | ||
141 | { | ||
142 | if (wsp->ws_flags & WRDSF_ERROR) | ||
143 | wsp->ws_debug = wsp->ws_error; | ||
144 | else if (wsp->ws_flags & WRDSF_SHOWERR) | ||
145 | wsp->ws_debug = _wsplt_error; | ||
146 | else | ||
147 | wsp->ws_flags &= ~WRDSF_SHOWDBG; | ||
148 | } | ||
149 | } | ||
150 | |||
151 | wsp->ws_input = input; | ||
152 | wsp->ws_len = len; | ||
153 | |||
154 | if (!(wsp->ws_flags & WRDSF_DOOFFS)) | ||
155 | wsp->ws_offs = 0; | ||
156 | |||
157 | if (!(wsp->ws_flags & WRDSF_DELIM)) | ||
158 | wsp->ws_delim = " \t\n"; | ||
159 | |||
160 | if (!(wsp->ws_flags & WRDSF_COMMENT)) | ||
161 | wsp->ws_comment = NULL; | ||
162 | |||
163 | if (!(wsp->ws_flags & WRDSF_CLOSURE)) | ||
164 | wsp->ws_closure = NULL; | ||
165 | |||
166 | wsp->ws_endp = 0; | ||
167 | |||
168 | wordsplit_init0 (wsp); | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | static int | ||
174 | alloc_space (struct wordsplit *wsp, size_t count) | ||
175 | { | ||
176 | size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; | ||
177 | char **ptr; | ||
178 | size_t newalloc; | ||
179 | |||
180 | if (wsp->ws_wordv == NULL) | ||
181 | { | ||
182 | newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; | ||
183 | ptr = calloc (newalloc, sizeof (ptr[0])); | ||
184 | } | ||
185 | else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) | ||
186 | { | ||
187 | newalloc = offs + wsp->ws_wordc + | ||
188 | (count > ALLOC_INCR ? count : ALLOC_INCR); | ||
189 | ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); | ||
190 | } | ||
191 | else | ||
192 | return 0; | ||
193 | |||
194 | if (ptr) | ||
195 | { | ||
196 | wsp->ws_wordn = newalloc; | ||
197 | wsp->ws_wordv = ptr; | ||
198 | } | ||
199 | else | ||
200 | return _wsplt_nomem (wsp); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | |||
205 | /* Node state flags */ | ||
206 | #define _WSNF_NULL 0x01 /* null node (a noop) */ | ||
207 | #define _WSNF_WORD 0x02 /* node contains word in v.word */ | ||
208 | #define _WSNF_QUOTE 0x04 /* text is quoted */ | ||
209 | #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ | ||
210 | #define _WSNF_JOIN 0x10 /* node must be joined with the next node */ | ||
211 | #define _WSNF_SEXP 0x20 /* is a sed expression */ | ||
212 | |||
213 | #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that | ||
214 | wordsplit_add_segm must add the | ||
215 | segment even if it is empty */ | ||
216 | |||
217 | struct wordsplit_node | ||
218 | { | ||
219 | struct wordsplit_node *prev; /* Previous element */ | ||
220 | struct wordsplit_node *next; /* Next element */ | ||
221 | int flags; /* Node flags */ | ||
222 | union | ||
223 | { | ||
224 | struct | ||
225 | { | ||
226 | size_t beg; /* Start of word in ws_input */ | ||
227 | size_t end; /* End of word in ws_input */ | ||
228 | } segm; | ||
229 | char *word; | ||
230 | } v; | ||
231 | }; | ||
232 | |||
233 | static const char * | ||
234 | wsnode_flagstr (int flags) | ||
235 | { | ||
236 | static char retbuf[6]; | ||
237 | char *p = retbuf; | ||
238 | |||
239 | if (flags & _WSNF_WORD) | ||
240 | *p++ = 'w'; | ||
241 | else if (flags & _WSNF_NULL) | ||