diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2019-07-10 20:05:14 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-07-10 20:05:14 +0300 |
commit | 45f329477f5bc8b2259cf11b489095ac94b0a813 (patch) | |
tree | 9dea96aa8cbb144a3c8f093fbaf0a7bc478af08e | |
parent | a3e4314631601e98d63a9b702cb31931ec5222b3 (diff) | |
download | vmod-dbrw-45f329477f5bc8b2259cf11b489095ac94b0a813.tar.gz vmod-dbrw-45f329477f5bc8b2259cf11b489095ac94b0a813.tar.bz2 |
Use wordsplit for a submodule
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | Makefile.am | 3 | ||||
-rwxr-xr-x | bootstrap | 2 | ||||
-rw-r--r-- | src/Makefile.am | 12 | ||||
-rw-r--r-- | src/wordsplit.c | 2546 | ||||
-rw-r--r-- | src/wordsplit.h | 271 | ||||
-rw-r--r-- | tests/Makefile.am | 6 | ||||
m--------- | wordsplit | 0 |
8 files changed, 20 insertions, 2823 deletions
diff --git a/.gitmodules b/.gitmodules index 00ae21c..6f6db00 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "acvmod"] path = acvmod url = git://git.gnu.org.ua/acvmod.git +[submodule "wordsplit"] + path = wordsplit + url = git://git.gnu.org.ua/wordsplit.git diff --git a/Makefile.am b/Makefile.am index a5248b8..df797dd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,8 +1,8 @@ # This file is part of vmod-dbrw -# Copyright (C) 2013-2017 Sergey Poznyakoff +# Copyright (C) 2013-2019 Sergey Poznyakoff # # Vmod-dbrw is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # @@ -16,6 +16,7 @@ ACLOCAL_AMFLAGS = -I m4 -I acvmod SUBDIRS = src doc tests include acvmod/top.am +EXTRA_DIST += wordsplit/wordsplit.c wordsplit/wordsplit.h @@ -1,9 +1,9 @@ #!/bin/sh for dir in m4 build-aux do test -d $dir || mkdir $dir done git submodule init -git submodule update +git submodule update --init --recursive test -f ChangeLog || touch ChangeLog autoreconf -f -i -s diff --git a/src/Makefile.am b/src/Makefile.am index 6cda3aa..2f23477 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,8 +1,8 @@ # This file is part of vmod-dbrw -# Copyright (C) 2013-2017 Sergey Poznyakoff +# Copyright (C) 2013-2019 Sergey Poznyakoff # # Vmod-dbrw is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # @@ -11,20 +11,26 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with vmod-dbrw. If not, see <http://www.gnu.org/licenses/>. -AM_CPPFLAGS=$(VARNISHAPI_CFLAGS) -I$(srcdir) -I$(builddir) +AM_CPPFLAGS=\ + $(VARNISHAPI_CFLAGS)\ + -I$(srcdir)\ + -I$(builddir)\ + -I$(top_srcdir)/wordsplit noinst_LTLIBRARIES = libsql.la libsql_la_SOURCES = \ be.c\ dbrw.h\ - sql.c\ + sql.c +nodist_libsql_la_SOURCES = \ wordsplit.h\ wordsplit.c +VPATH += $(top_srcdir)/wordsplit if USE_MYSQL libsql_la_SOURCES += mysql.c endif if USE_PGSQL diff --git a/src/wordsplit.c b/src/wordsplit.c deleted file mode 100644 index bad59b1..0000000 --- a/src/wordsplit.c +++ /dev/null @@ -1,2546 +0,0 @@ -/* wordsplit - a word splitter - Copyright (C) 2009-2018 Sergey Poznyakoff - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3 of the License, or (at your - option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <errno.h> -#include <ctype.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <stdarg.h> -#include <pwd.h> -#include <glob.h> - -#if ENABLE_NLS -# include <gettext.h> -#else -# define gettext(msgid) msgid -#endif -#define _(msgid) gettext (msgid) -#define N_(msgid) msgid - -#include <wordsplit.h> - -#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') -#define ISDELIM(ws,c) \ - (strchr ((ws)->ws_delim, (c)) != NULL) -#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) -#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') -#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') -#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) -#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') -#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) -#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) -#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) - -#define ISVARBEG(c) (ISALPHA(c) || c == '_') -#define ISVARCHR(c) (ISALNUM(c) || c == '_') - -#define WSP_RETURN_DELIMS(wsp) \ - ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS)) - -#define ALLOC_INIT 128 -#define ALLOC_INCR 128 - -static void -_wsplt_alloc_die (struct wordsplit *wsp) -{ - wsp->ws_error ("%s", _("memory exhausted")); - abort (); -} - -static void -_wsplt_error (const char *fmt, ...) -{ - va_list ap; - - va_start (ap, fmt); - vfprintf (stderr, fmt, ap); - va_end (ap); - fputc ('\n', stderr); -} - -static void wordsplit_free_nodes (struct wordsplit *); - -static int -_wsplt_seterr (struct wordsplit *wsp, int ec) -{ - wsp->ws_errno = ec; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return ec; -} - -static int -_wsplt_nomem (struct wordsplit *wsp) -{ - errno = ENOMEM; - wsp->ws_errno = WRDSE_NOSPACE; - if (wsp->ws_flags & WRDSF_ENOMEMABRT) - wsp->ws_alloc_die (wsp); - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - if (!(wsp->ws_flags & WRDSF_REUSE)) - wordsplit_free (wsp); - wordsplit_free_nodes (wsp); - return wsp->ws_errno; -} - -static int wordsplit_run (const char *command, size_t length, - struct wordsplit *wsp, - int flags, int lvl); - -static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, - int flags); -static int wordsplit_process_list (struct wordsplit *wsp, size_t start); -static int wordsplit_finish (struct wordsplit *wsp); - -static int -_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, - char const *str, int len, - int flags, int finalize) -{ - int rc; - - wss->ws_delim = wsp->ws_delim; - wss->ws_debug = wsp->ws_debug; - wss->ws_error = wsp->ws_error; - wss->ws_alloc_die = wsp->ws_alloc_die; - - if (!(flags & WRDSF_NOVAR)) - { - wss->ws_env = wsp->ws_env; - wss->ws_getvar = wsp->ws_getvar; - flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR); - } - if (!(flags & WRDSF_NOCMD)) - { - wss->ws_command = wsp->ws_command; - } - - if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD)) - { - wss->ws_closure = wsp->ws_closure; - flags |= wsp->ws_flags & WRDSF_CLOSURE; - } - - wss->ws_options = wsp->ws_options; - - flags |= WRDSF_DELIM - | WRDSF_ALLOC_DIE - | WRDSF_ERROR - | WRDSF_DEBUG - | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS)); - - rc = wordsplit_init (wss, str, len, flags); - if (rc) - return rc; - wss->ws_lvl = wsp->ws_lvl + 1; - rc = wordsplit_process_list (wss, 0); - if (rc) - { - wordsplit_free_nodes (wss); - return rc; - } - if (finalize) - { - rc = wordsplit_finish (wss); - wordsplit_free_nodes (wss); - } - return rc; -} - -static void -_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss) -{ - if (wsp->ws_errno == WRDSE_USERERR) - free (wsp->ws_usererr); - wsp->ws_errno = wss->ws_errno; - if (wss->ws_errno == WRDSE_USERERR) - { - wsp->ws_usererr = wss->ws_usererr; - wss->ws_errno = WRDSE_EOF; - wss->ws_usererr = NULL; - } -} - -static void -wordsplit_init0 (struct wordsplit *wsp) -{ - if (wsp->ws_flags & WRDSF_REUSE) - { - if (!(wsp->ws_flags & WRDSF_APPEND)) - wordsplit_free_words (wsp); - wordsplit_clearerr (wsp); - } - else - { - wsp->ws_wordv = NULL; - wsp->ws_wordc = 0; - wsp->ws_wordn = 0; - } - - wsp->ws_errno = 0; -} - -char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; - -static int -wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, - int flags) -{ - wsp->ws_flags = flags; - - if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) - wsp->ws_alloc_die = _wsplt_alloc_die; - if (!(wsp->ws_flags & WRDSF_ERROR)) - wsp->ws_error = _wsplt_error; - - if (!(wsp->ws_flags & WRDSF_NOVAR)) - { - /* These will be initialized on first variable assignment */ - wsp->ws_envidx = wsp->ws_envsiz = 0; - wsp->ws_envbuf = NULL; - } - - if (!(wsp->ws_flags & WRDSF_NOCMD)) - { - if (!wsp->ws_command) - { - _wsplt_seterr (wsp, WRDSE_USAGE); - errno = EINVAL; - return wsp->ws_errno; - } - } - - if (wsp->ws_flags & WRDSF_SHOWDBG) - { - if (!(wsp->ws_flags & WRDSF_DEBUG)) - { - if (wsp->ws_flags & WRDSF_ERROR) - wsp->ws_debug = wsp->ws_error; - else if (wsp->ws_flags & WRDSF_SHOWERR) - wsp->ws_debug = _wsplt_error; - else - wsp->ws_flags &= ~WRDSF_SHOWDBG; - } - } - - wsp->ws_input = input; - wsp->ws_len = len; - - if (!(wsp->ws_flags & WRDSF_DOOFFS)) - wsp->ws_offs = 0; - - if (!(wsp->ws_flags & WRDSF_DELIM)) - wsp->ws_delim = " \t\n"; - - if (!(wsp->ws_flags & WRDSF_COMMENT)) - wsp->ws_comment = NULL; - - if (!(wsp->ws_flags & WRDSF_CLOSURE)) - wsp->ws_closure = NULL; - - if (!(wsp->ws_flags & WRDSF_OPTIONS)) - wsp->ws_options = 0; - - if (wsp->ws_flags & WRDSF_ESCAPE) - { - if (!wsp->ws_escape[WRDSX_WORD]) - wsp->ws_escape[WRDSX_WORD] = ""; - if (!wsp->ws_escape[WRDSX_QUOTE]) - wsp->ws_escape[WRDSX_QUOTE] = ""; - } - else - { - if (wsp->ws_flags & WRDSF_CESCAPES) - { - wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; - wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; - wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD - | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; - } - else - { - wsp->ws_escape[WRDSX_WORD] = ""; - wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\""; - wsp->ws_options |= WRDSO_BSKEEP_QUOTE; - } - } - - wsp->ws_endp = 0; - wsp->ws_wordi = 0; - - if (wsp->ws_flags & WRDSF_REUSE) - wordsplit_free_nodes (wsp); - wsp->ws_head = wsp->ws_tail = NULL; - - wordsplit_init0 (wsp); - - return 0; -} - -static int -alloc_space (struct wordsplit *wsp, size_t count) -{ - size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; - char **ptr; - size_t newalloc; - - if (wsp->ws_wordv == NULL) - { - newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; - ptr = calloc (newalloc, sizeof (ptr[0])); - } - else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) - { - newalloc = offs + wsp->ws_wordc + - (count > ALLOC_INCR ? count : ALLOC_INCR); - ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); - } - else - return 0; - - if (ptr) - { - wsp->ws_wordn = newalloc; - wsp->ws_wordv = ptr; - } - else - return _wsplt_nomem (wsp); - return 0; -} - - -/* Node state flags */ -#define _WSNF_NULL 0x01 /* null node (a noop) */ -#define _WSNF_WORD 0x02 /* node contains word in v.word */ -#define _WSNF_QUOTE 0x04 /* text is quoted */ -#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ -#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ -#define _WSNF_SEXP 0x20 /* is a sed expression */ -#define _WSNF_DELIM 0x40 /* node is a delimiter */ - -#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that - wordsplit_add_segm must add the - segment even if it is empty */ - -struct wordsplit_node -{ - struct wordsplit_node *prev; /* Previous element */ - struct wordsplit_node *next; /* Next element */ - int flags; /* Node flags */ - union - { - struct - { - size_t beg; /* Start of word in ws_input */ - size_t end; /* End of word in ws_input */ - } segm; - char *word; - } v; -}; - -static const char * -wsnode_flagstr (int flags) -{ - static char retbuf[7]; - char *p = retbuf; - - if (flags & _WSNF_WORD) - *p++ = 'w'; - else if (flags & _WSNF_NULL) - *p++ = 'n'; - else - *p++ = '-'; - if (flags & _WSNF_QUOTE) - *p++ = 'q'; - else - *p++ = '-'; - if (flags & _WSNF_NOEXPAND) - *p++ = 'E'; - else - *p++ = '-'; - if (flags & _WSNF_JOIN) - *p++ = 'j'; - else - *p++ = '-'; - if (flags & _WSNF_SEXP) - *p++ = 's'; - else - *p++ = '-'; - if (flags & _WSNF_DELIM) - *p++ = 'd'; - else - *p++ = '-'; - *p = 0; - return retbuf; -} - -static const char * -wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) -{ - if (p->flags & _WSNF_NULL) - return ""; - else if (p->flags & _WSNF_WORD) - return p->v.word; - else - return wsp->ws_input + p->v.segm.beg; -} - -static size_t -wsnode_len (struct wordsplit_node *p) -{ - if (p->flags & _WSNF_NULL) - return 0; - else if (p->flags & _WSNF_WORD) - return strlen (p->v.word); - else - return p->v.segm.end - p->v.segm.beg; -} - -static int -wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) -{ - struct wordsplit_node *node = calloc (1, sizeof (*node)); - if (!node) - return _wsplt_nomem (wsp); - *pnode = node; - return 0; -} - -static void -wsnode_free (struct wordsplit_node *p) -{ - if (p->flags & _WSNF_WORD) - free (p->v.word); - free (p); -} - -static void -wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) -{ - node->next = NULL; - node->prev = wsp->ws_tail; - if (wsp->ws_tail) - wsp->ws_tail->next = node; - else - wsp->ws_head = node; - wsp->ws_tail = node; -} - -static void -wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) -{ - struct wordsplit_node *p; - - p = node->prev; - if (p) - { - p->next = node->next; - if (!node->next) - p->flags &= ~_WSNF_JOIN; - } - else - wsp->ws_head = node->next; - - p = node->next; - if (p) - p->prev = node->prev; - else - wsp->ws_tail = node->prev; - - node->next = node->prev = NULL; -} - -static struct wordsplit_node * -wsnode_tail (struct wordsplit_node *p) -{ - while (p && p->next) - p = p->next; - return p; -} - -static void -wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, - struct wordsplit_node *anchor, int before) -{ - if (!wsp->ws_head) - { - node->next = node->prev = NULL; - wsp->ws_head = wsp->ws_tail = node; - } - else if (before) - { - if (anchor->prev) - wsnode_insert (wsp, node, anchor->prev, 0); - else - { - struct wordsplit_node *tail = wsnode_tail (node); - node->prev = NULL; - tail->next = anchor; - anchor->prev = tail; - wsp->ws_head = node; - } - } - else - { - struct wordsplit_node *p; - struct wordsplit_node *tail = wsnode_tail (node); - - p = anchor->next; - if (p) - p->prev = tail; - else - wsp->ws_tail = tail; - tail->next = p; - node->prev = anchor; - anchor->next = node; - } -} - -static int -wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg) -{ - struct wordsplit_node *node; - int rc; - - if (end == beg && !(flg & _WSNF_EMPTYOK)) - return 0; - rc = wsnode_new (wsp, &node); - if (rc) - return rc; - node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK); - node->v.segm.beg = beg; - node->v.segm.end = end; - wsnode_append (wsp, node); - return 0; -} - -static void -wordsplit_free_nodes (struct wordsplit *wsp) -{ - struct wordsplit_node *p; - - for (p = wsp->ws_head; p;) - { - struct wordsplit_node *next = p->next; - wsnode_free (p); - p = next; - } - wsp->ws_head = wsp->ws_tail = NULL; -} - -static void -wordsplit_dump_nodes (struct wordsplit *wsp) -{ - struct wordsplit_node *p; - int n = 0; - - for (p = wsp->ws_head, n = 0; p; p = p->next, n++) - { - if (p->flags & _WSNF_WORD) - wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;", - wsp->ws_lvl, - n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); - else - wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;", - wsp->ws_lvl, - n, p, p->flags, wsnode_flagstr (p->flags), - (int) (p->v.segm.end - p->v.segm.beg), - wsp->ws_input + p->v.segm.beg); - } -} - -static int -coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) -{ - struct wordsplit_node *p, *end; - size_t len = 0; - char *buf, *cur; - int stop; - - if (!(node->flags & _WSNF_JOIN)) - return 0; - - for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) - { - len += wsnode_len (p); - } - if (p) - len += wsnode_len (p); - end = p; - - buf = malloc (len + 1); - if (!buf) - return _wsplt_nomem (wsp); - cur = buf; - - p = node; - for (stop = 0; !stop;) - { - struct wordsplit_node *next = p->next; - const char *str = wsnode_ptr (wsp, p); - size_t slen = wsnode_len (p); - - memcpy (cur, str, slen); - cur += slen; - if (p != node) - { - node->flags |= p->flags & _WSNF_QUOTE; - wsnode_remove (wsp, p); - stop = p == end; - wsnode_free (p); - } - p = next; - } - - *cur = 0; - - node->flags &= ~_WSNF_JOIN; - - if (node->flags & _WSNF_WORD) - free (node->v.word); - else - node->flags |= _WSNF_WORD; - node->v.word = buf; - return 0; -} - -static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, - char *dst, const char *src, - size_t n); - -static int -wsnode_quoteremoval (struct wordsplit *wsp) -{ - struct wordsplit_node *p; - - for (p = wsp->ws_head; p; p = p->next) - { - const char *str = wsnode_ptr (wsp, p); - size_t slen = wsnode_len (p); - int unquote; - - if (wsp->ws_flags & WRDSF_QUOTE) - unquote = !(p->flags & _WSNF_NOEXPAND); - else - unquote = 0; - - if (unquote) - { - if (!(p->flags & _WSNF_WORD)) - { - char *newstr = malloc (slen + 1); - if (!newstr) - return _wsplt_nomem (wsp); - memcpy (newstr, str, slen); - newstr[slen] = 0; - p->v.word = newstr; - p->flags |= _WSNF_WORD; - } - - wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE, - p->v.word, str, slen); - } - } - return 0; -} - -static int -wsnode_coalesce (struct wordsplit *wsp) -{ - struct wordsplit_node *p; - - for (p = wsp->ws_head; p; p = p->next) - { - if (p->flags & _WSNF_JOIN) - if (coalesce_segment (wsp, p)) - return 1; - } - return 0; -} - -static int -wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p) -{ - if (p->next) - { - struct wordsplit_node *np = p; - while (np && np->next) - { - np->flags |= _WSNF_JOIN; - np = np->next; - } - if (coalesce_segment (wsp, p)) - return 1; - } - return 0; -} - -static size_t skip_delim (struct wordsplit *wsp); - -static int -wordsplit_finish (struct wordsplit *wsp) -{ - struct wordsplit_node *p; - size_t n; - int delim; - - /* Postprocess delimiters. It would be rather simple, if it weren't for - the incremental operation. - - Nodes of type _WSNF_DELIM get inserted to the node list if either - WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set. - - The following cases should be distinguished: - - 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress - any runs of similar delimiter nodes to a single node. The nodes are - 'similar' if they point to the same delimiter character. - - If WRDSO_MAXWORDS option is set, stop compressing when - ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into - a single last node. - - 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not, - remove any delimiter nodes. Stop operation when - ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into - a single last node. - - 3. If incremental operation is in progress, restart the loop any time - a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS - is set. - */ - again: - delim = 0; /* Delimiter being processed (if any) */ - n = 0; /* Number of words processed so far */ - p = wsp->ws_head; /* Current node */ - - while (p) - { - struct wordsplit_node *next = p->next; - if (p->flags & _WSNF_DELIM) - { - if (wsp->ws_flags & WRDSF_RETURN_DELIMS) - { - if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) - { - char const *s = wsnode_ptr (wsp, p); - if (delim) - { - if (delim == *s) - { - wsnode_remove (wsp, p); - p = next; - continue; - } - else - { - delim = 0; - n++; /* Count this node; it will be returned */ - } - } - else - { - delim = *s; - p = next; - continue; - } - } - } - else if (wsp->ws_options & WRDSO_MAXWORDS) - { - wsnode_remove (wsp, p); - p = next; - continue; - } - } - else - { - if (delim) - { - /* Last node was a delimiter or a compressed run of delimiters; - Count it, and clear the delimiter marker */ - n++; - delim = 0; - } - if (wsp->ws_options & WRDSO_MAXWORDS) - { - if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords) - break; - } - } - n++; - if (wsp->ws_flags & WRDSF_INCREMENTAL) - p = NULL; /* Break the loop */ - else - p = next; - } - - if (p) - { - /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords - words have already been collected. Reconstruct a single final - node from the remaining nodes. */ - if (wsnode_tail_coalesce (wsp, p)) - return wsp->ws_errno; - n++; - } - - if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL)) - { - /* The loop above have eliminated all nodes. Restart the - processing, if there's any input left. */ - if (wsp->ws_endp < wsp->ws_len) - { - int rc; - if (wsp->ws_flags & WRDSF_SHOWDBG) - wsp->ws_debug (_("Restarting")); - rc = wordsplit_process_list (wsp, skip_delim (wsp)); - if (rc) - return rc; - } - else - { - wsp->ws_error = WRDSE_EOF; - return WRDSE_EOF; - } - goto again; - } - - if (alloc_space (wsp, n + 1)) - return wsp->ws_errno; - - while (wsp->ws_head) - { - const char *str = wsnode_ptr (wsp, wsp->ws_head); - size_t slen = wsnode_len (wsp->ws_head); - char *newstr = malloc (slen + 1); - - /* Assign newstr first, even if it is NULL. This way - wordsplit_free will work even if we return - nomem later. */ - wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr; - if (!newstr) - return _wsplt_nomem (wsp); - memcpy (newstr, str, slen); - newstr[slen] = 0; - - wsnode_remove (wsp, wsp->ws_head); - - wsp->ws_wordc++; - wsp->ws_wordi++; - - if (wsp->ws_flags & WRDSF_INCREMENTAL) - break; - } - wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; - return 0; -} - -int -wordsplit_append (wordsplit_t *wsp, int argc, char **argv) -{ - int rc; - size_t i; - - rc = alloc_space (wsp, wsp->ws_wordc + argc + 1); - if (rc) - return rc; - for (i = 0; i < argc; i++) - { - char *newstr = strdup (argv[i]); - if (!newstr) - { - while (i > 0) - { - free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]); - wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL; - i--; - } - return _wsplt_nomem (wsp); - } - wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr; - } - wsp->ws_wordc += i; - wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; - return 0; -} - -/* Variable expansion */ -static int -node_split_prefix (struct wordsplit *wsp, - struct wordsplit_node **ptail, - struct wordsplit_node *node, - size_t beg, size_t len, int flg) -{ - struct wordsplit_node *newnode; - - if (len == 0) - return 0; - if (wsnode_new (wsp, &newnode)) - return 1; - wsnode_insert (wsp, newnode, *ptail, 0); - if (node->flags & _WSNF_WORD) - { - const char *str = wsnode_ptr (wsp, node); - char *newstr = malloc (len + 1); - if (!newstr) - return _wsplt_nomem (wsp); - memcpy (newstr, str + beg, len); - newstr[len] = 0; - newnode->flags = _WSNF_WORD; - newnode->v.word = newstr; - } - else - { - newnode->v.segm.beg = node->v.segm.beg + beg; - newnode->v.segm.end = newnode->v.segm.beg + len; - } - newnode->flags |= flg; - *ptail = newnode; - return 0; -} - -static int -find_closing_paren (const char *str, size_t i, size_t len, size_t *poff, - char const *paren) -{ - enum { st_init, st_squote, st_dquote } state = st_init; - size_t level = 1; - - for (; i < len; i++) - { - switch (state) - { - case st_init: - switch (str[i]) - { - default: - if (str[i] == paren[0]) - { - level++; - break; - } - else if (str[i] == paren[1]) - { - if (--level == 0) - { - *poff = i; - return 0; - } - break; - } - break; - - case '"': - state = st_dquote; - break; - - case '\'': - state = st_squote; - break; - } - break; - - case st_squote: - if (str[i] == '\'') - state = st_init; - break; - - case st_dquote: - if (str[i] == '\\') - i++; - else if (str[i] == '"') - state = st_init; - break; - } - } - return 1; -} - -static int -wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len, - char const **ret) -{ - size_t i; - - if (!(wsp->ws_flags & WRDSF_ENV)) - return WRDSE_UNDEF; - - if (wsp->ws_flags & WRDSF_ENV_KV) - { - /* A key-value pair environment */ - for (i = 0; wsp->ws_env[i]; i++) - { - size_t elen = strlen (wsp->ws_env[i]); - if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) - { - *ret = wsp->ws_env[i + 1]; - return WRDSE_OK; - } - /* Skip the value. Break the loop if it is NULL. */ - i++; - if (wsp->ws_env[i] == NULL) - break; - } - } - else if (wsp->ws_env) - { - /* Usual (A=B) environment. */ - for (i = 0; wsp->ws_env[i]; i++) - { - size_t j; - const char *var = wsp->ws_env[i]; - - for (j = 0; j < len; j++) - if (name[j] != var[j]) - break; - if (j == len && var[j] == '=') - { - *ret = var + j + 1; - return WRDSE_OK; - } - } - } - return WRDSE_UNDEF; -} - -static int -wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen, - char *value) -{ - int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1; - char *v; - - if (wsp->ws_envidx + n >= wsp->ws_envsiz) - { - size_t sz; - char **newenv; - - if (!wsp->ws_envbuf) - { - if (wsp->ws_flags & WRDSF_ENV) - { - size_t i = 0, j; - - if (wsp->ws_env) - { - for (; wsp->ws_env[i]; i++) - ; - } - - sz = i + n + 1; - - newenv = calloc (sz, sizeof(newenv[0])); - if (!newenv) - return _wsplt_nomem (wsp); - - for (j = 0; j < i; j++) - { - newenv[j] = strdup (wsp->ws_env[j]); - if (!newenv[j]) - { - for (; j > 1; j--) - free (newenv[j-1]); - free (newenv[j-1]); - return _wsplt_nomem (wsp); - } - } - newenv[j] = NULL; - - wsp->ws_envbuf = newenv; - wsp->ws_envidx = i; - wsp->ws_envsiz = sz; - wsp->ws_env = (const char**) wsp->ws_envbuf; - } - else - { - newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0])); - if (!newenv) - return _wsplt_nomem (wsp); - wsp->ws_envbuf = newenv; - wsp->ws_envidx = 0; - wsp->ws_envsiz = WORDSPLIT_ENV_INIT; - wsp->ws_env = (const char**) wsp->ws_envbuf; - wsp->ws_flags |= WRDSF_ENV; - } - } - else - { - wsp->ws_envsiz *= 2; - newenv = realloc (wsp->ws_envbuf, - wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0])); - if (!newenv) - return _wsplt_nomem (wsp); - wsp->ws_envbuf = newenv; - wsp->ws_env = (const char**) wsp->ws_envbuf; - } - } - - if (wsp->ws_flags & WRDSF_ENV_KV) - { - /* A key-value pair environment */ - |