aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS31
-rw-r--r--README3
-rw-r--r--configure.ac2
-rw-r--r--doc/vmod-dbrw.334
-rw-r--r--doc/vmod-dbrw.texi16
-rw-r--r--src/vmod_dbrw.c127
-rw-r--r--src/wordsplit.c1646
-rw-r--r--src/wordsplit.h214
-rw-r--r--tests/initdb.at10
-rw-r--r--tests/rewrite01.at5
-rw-r--r--tests/rewrite02.at5
-rw-r--r--tests/rewrite03.at5
-rw-r--r--tests/rewrite04.at5
-rw-r--r--tests/rewrite05.at5
-rw-r--r--tests/rewrite06.at5
15 files changed, 1664 insertions, 449 deletions
diff --git a/NEWS b/NEWS
index 913b0f9..c700564 100644
--- a/NEWS
+++ b/NEWS
@@ -1,2 +1,2 @@
-vmod-dbrw -- history of user-visible changes. 2018-01-30
+vmod-dbrw -- history of user-visible changes. 2018-12-08
See the end of file for copying conditions.
@@ -5,3 +5,3 @@ Please send vmod-dbrw bug reports to <gray@gnu.org>
-Version 2.2.90 (Git)
+Version 2.2.91 (Git)
@@ -20,2 +20,29 @@ e.g.:
+* The $() functions in SQL templates
+
+The SQL templates support the use of $() constructs for invoking
+built-in functions. So far one function is implemented:
+
+ $(urlprefixes PATH)
+
+It expands to comma-separated list of properly quoted pathname
+prefixes, constructed from its argument. Optional query part is
+stripped off the argument prior to expansion. For example
+
+ $(urlprefixes "/local/user/local?a=1")
+
+expands to:
+
+ '/local/user/local','/local/user','/local'
+
+This construct is intended for use in SQL IN conditionals, for
+example:
+
+ SELECT dest,pattern,value,flags
+ FROM rewrite
+ WHERE host='$host'
+ AND url IN ($(urlprefixes $url))
+ ORDER BY length(dest),value,weight DESC
+
+
diff --git a/README b/README
index be4e408..354099f 100644
--- a/README
+++ b/README
@@ -1,3 +1,2 @@
Vmod-dbrw README
-Copyright (C) 2013-2017 Sergey Poznyakoff
See the end of file for copying conditions.
@@ -225,3 +224,3 @@ Send bug reports and suggestions to <gray@gnu.org>
-Copyright (C) 2013-2017 Sergey Poznyakoff
+Copyright (C) 2013-2018 Sergey Poznyakoff
diff --git a/configure.ac b/configure.ac
index 7a1272c..1212a37 100644
--- a/configure.ac
+++ b/configure.ac
@@ -16,3 +16,3 @@
AC_PREREQ(2.69)
-AC_INIT([vmod-dbrw], 2.2.90, [gray@gnu.org])
+AC_INIT([vmod-dbrw], 2.2.91, [gray@gnu.org])
AC_CONFIG_AUX_DIR([build-aux])
diff --git a/doc/vmod-dbrw.3 b/doc/vmod-dbrw.3
index 6f1dba8..4760b6b 100644
--- a/doc/vmod-dbrw.3
+++ b/doc/vmod-dbrw.3
@@ -1,3 +1,3 @@
.\" This file is part of Vmod-dbrw -*- nroff -*-
-.\" Copyright (C) 2013-2017 Sergey Poznyakoff
+.\" Copyright (C) 2013-2018 Sergey Poznyakoff
.\"
@@ -15,3 +15,3 @@
.\" along with vmod-dbrw. If not, see <http://www.gnu.org/licenses/>.
-.TH VMOD-DBRW 1 "January 30, 2018" "VMOD-DBRW" "User Reference"
+.TH VMOD-DBRW 1 "December 8, 2018" "VMOD-DBRW" "User Reference"
.SH NAME
@@ -136,2 +136,28 @@ of the name.
.PP
+The special construct
+.sp
+.EX
+$(urlprefixes \fIPATH\fR)
+.EE
+.sp
+expands to a comma-separated list of all possible path prefixes in
+\fIPATH\fR. Each element in the list is quoted, so the result can
+safely be used in SQL statements. For example,
+.sp
+.EX
+$(urlprefixes "/local/user/login")
+.EE
+.sp
+produces
+.sp
+.EX
+ '/local/user/login','/local/user','/local'
+.EE
+.PP
+This statement is usually used in \fBIN\fR SQL constructs, e.g.
+.sp
+.EX
+SELECT * FROM table WHERE url IN ($(urlprefixes $url))
+.EE
+.PP
The expanded query is then sent to the database server. Handling
@@ -250,3 +276,5 @@ sub vcl_recv {
{"SELECT dest,pattern,value,flags FROM rewrite
- WHERE host='$host' and '$url' like url"});
+ WHERE host='$host'
+ AND url IN ($(urlprefixes $url))
+ ORDER BY LENGTH(dest),value DESC"});
set req.http.X-Redirect-To =
diff --git a/doc/vmod-dbrw.texi b/doc/vmod-dbrw.texi
index 40fc84d..31b15d1 100644
--- a/doc/vmod-dbrw.texi
+++ b/doc/vmod-dbrw.texi
@@ -498,4 +498,4 @@ ORDER BY weight
@noindent
-Furthermore, the @code{url} column can contain a SQL wildcard pattern,
-in which case the query will look like:
+Furthermore, the @code{url} column can contain a path prefix,
+which can be matched using the @code{IN} conditional:
@@ -506,3 +506,3 @@ FROM rewrite
WHERE host='$host'
-AND '$url' like $url
+AND url IN ($(urlprefixes $url))
ORDER BY weight
@@ -511,2 +511,12 @@ ORDER BY weight
+Notice the use of the @samp{$(urlprefixes $url)}. This invokes the built-in
+@dfn{function} @code{urlprefixes}, which expands to comma-separated
+list of properly quoted pathname prefixes, constructed from its
+argument. For example, if @samp{$url} is @samp{/local/user/local?a=1},
+then the expansion of @samp{$(urlprefixes $url)} is:
+
+@example
+'/local/user/local','/local/user','/local'
+@end example
+
@node Rewrite
diff --git a/src/vmod_dbrw.c b/src/vmod_dbrw.c
index d6785d7..63d4ea1 100644
--- a/src/vmod_dbrw.c
+++ b/src/vmod_dbrw.c
@@ -191,5 +191,7 @@ parse_flags(const char *arg, int *qdisp, int *flags, char status[])
rc = 1;
- } else
+ } else {
strncpy(status, ws.ws_wordv[i] + 9,
HTTP_STATUS_LEN);
+ status[HTTP_STATUS_LEN] = 0;
+ }
} else if (strncmp(ws.ws_wordv[i], "R=", 2) == 0) {
@@ -199,5 +201,7 @@ parse_flags(const char *arg, int *qdisp, int *flags, char status[])
rc = 1;
- } else
+ } else {
strncpy(status, ws.ws_wordv[i] + 2,
HTTP_STATUS_LEN);
+ status[HTTP_STATUS_LEN] = 0;
+ }
} else {
@@ -505,2 +509,101 @@ findmatch(VRT_CTX, struct dbrw_connection *conn, char **param)
+static int
+expand_error(char **ret, char const *func, char const *msg)
+{
+ static char delim[] = ": ";
+ *ret = malloc(strlen(func) + strlen(msg) + 1);
+ if (*ret) {
+ strcat(strcat(strcpy(*ret, func), delim), msg);
+ return WRDSE_USERERR;
+ } else
+ return WRDSE_NOSPACE;
+}
+
+static int
+expand_urlprefixes(struct dbrw_connection *cp, char **argv, char **ret)
+{
+ char *arg;
+ size_t n, len, i, j;
+ char *q, *res;
+
+ if (argv[1] == NULL || argv[2] != NULL)
+ return expand_error(ret, argv[0], "bad arguments");
+
+ /* Create a copy of the argument */
+ if (cp->conf->backend->sql_escape) {
+ arg = sql_escape(cp, argv[1]);
+ } else {
+ arg = strdup(argv[1]);
+ }
+ if (!arg)
+ return WRDSE_NOSPACE;
+
+ /* Cut off eventual query */
+ i = j = strcspn(arg, "?");
+ arg[i] = 0;
+
+ /* Compute the resulting length */
+ len = i;
+ n = 1;
+ for (; i > 0; i--) {
+ if (arg[i] == '/') {
+ len += i;
+ n++;
+ }
+ }
+
+ /* Count quotes around each member */
+ len += n * 2 + n - 1;
+
+ /* Allocate the result */
+ res = malloc(len + 1);
+ if (!res) {
+ free(arg);
+ return WRDSE_NOSPACE;
+ }
+
+ /* Format the result */
+ q = res;
+ i = j;
+ while (i) {
+ if (q > res)
+ *q++ = ',';
+ *q++ = '\'';
+ memcpy(q, arg, i);
+ q += i;
+ *q++ = '\'';
+ i--;
+ while (i > 0 && arg[i] != '/')
+ i--;
+ }
+ *q = 0;
+ *ret = res;
+ free(arg);
+
+ return WRDSE_OK;
+}
+
+static struct expcom {
+ char *com;
+ int (*exp) (struct dbrw_connection *, char **, char **);
+} expcomtab[] = {
+ { "urlprefixes", expand_urlprefixes },
+ { NULL }
+};
+
+static int
+query_command_expand(char **ret, const char *cmd, size_t len, char **argv,
+ void *clos)
+{
+ struct expcom *ec;
+ static char diagmsg[] = "unknown command: ";
+
+ for (ec = expcomtab; ec->com; ec++) {
+ if (strcmp(ec->com, argv[0]) == 0)
+ return ec->exp(clos, argv, ret);
+ }
+
+ return expand_error(ret, argv[0], "unknown command");
+}
+
static char *
@@ -511,3 +614,4 @@ do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg)
char *res;
-
+ int wsflags;
+
if (sql_connect(cp) || cp->state != state_connected)
@@ -538,8 +642,15 @@ do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg)
- debug(cp->conf, 2, ("expanding query"));
+ debug(cp->conf, 2, ("expanding query {\"%s\"}", cp->conf->query));
ws.ws_env = (const char **)wsenv.ws_wordv;
- rc = wordsplit(cp->conf->query, &ws,
- WRDSF_NOCMD | WRDSF_QUOTE |
- WRDSF_NOSPLIT |
- WRDSF_ENV | WRDSF_UNDEF);
+ ws.ws_command = query_command_expand;
+ ws.ws_closure = cp;
+ wsflags = WRDSF_NOSPLIT | WRDSF_CLOSURE | WRDSF_ENV | WRDSF_UNDEF;
+
+ if (cp->conf->debug_level == 100) {
+ ws.ws_debug = dbrw_debug;
+ wsflags |= WRDSF_DEBUG | WRDSF_SHOWDBG;
+ }
+
+ rc = wordsplit(cp->conf->query, &ws, wsflags);
+
if (rc) {
diff --git a/src/wordsplit.c b/src/wordsplit.c
index f4740bf..bad59b1 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -1,3 +1,3 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2014 Sergey Poznyakoff
+ Copyright (C) 2009-2018 Sergey Poznyakoff
@@ -27,2 +27,4 @@
#include <stdarg.h>
+#include <pwd.h>
+#include <glob.h>
@@ -50,2 +52,8 @@
+#define ISVARBEG(c) (ISALPHA(c) || c == '_')
+#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+
+#define WSP_RETURN_DELIMS(wsp) \
+ ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
+
#define ALLOC_INIT 128
@@ -56,3 +64,3 @@ _wsplt_alloc_die (struct wordsplit *wsp)
{
- wsp->ws_error (_("memory exhausted"));
+ wsp->ws_error ("%s", _("memory exhausted"));
abort ();
@@ -60,3 +68,3 @@ _wsplt_alloc_die (struct wordsplit *wsp)
-static void
+static void
_wsplt_error (const char *fmt, ...)
@@ -74,2 +82,11 @@ static void wordsplit_free_nodes (struct wordsplit *);
static int
+_wsplt_seterr (struct wordsplit *wsp, int ec)
+{
+ wsp->ws_errno = ec;
+ if (wsp->ws_flags & WRDSF_SHOWERR)
+ wordsplit_perror (wsp);
+ return ec;
+}
+
+static int
_wsplt_nomem (struct wordsplit *wsp)
@@ -88,2 +105,80 @@ _wsplt_nomem (struct wordsplit *wsp)
+static int wordsplit_run (const char *command, size_t length,
+ struct wordsplit *wsp,
+ int flags, int lvl);
+
+static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ int flags);
+static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
+static int wordsplit_finish (struct wordsplit *wsp);
+
+static int
+_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
+ char const *str, int len,
+ int flags, int finalize)
+{
+ int rc;
+
+ wss->ws_delim = wsp->ws_delim;
+ wss->ws_debug = wsp->ws_debug;
+ wss->ws_error = wsp->ws_error;
+ wss->ws_alloc_die = wsp->ws_alloc_die;
+
+ if (!(flags & WRDSF_NOVAR))
+ {
+ wss->ws_env = wsp->ws_env;
+ wss->ws_getvar = wsp->ws_getvar;
+ flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
+ }
+ if (!(flags & WRDSF_NOCMD))
+ {
+ wss->ws_command = wsp->ws_command;
+ }
+
+ if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
+ {
+ wss->ws_closure = wsp->ws_closure;
+ flags |= wsp->ws_flags & WRDSF_CLOSURE;
+ }
+
+ wss->ws_options = wsp->ws_options;
+
+ flags |= WRDSF_DELIM
+ | WRDSF_ALLOC_DIE
+ | WRDSF_ERROR
+ | WRDSF_DEBUG
+ | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
+
+ rc = wordsplit_init (wss, str, len, flags);
+ if (rc)
+ return rc;
+ wss->ws_lvl = wsp->ws_lvl + 1;
+ rc = wordsplit_process_list (wss, 0);
+ if (rc)
+ {
+ wordsplit_free_nodes (wss);
+ return rc;
+ }
+ if (finalize)
+ {
+ rc = wordsplit_finish (wss);
+ wordsplit_free_nodes (wss);
+ }
+ return rc;
+}
+
+static void
+_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
+{
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_errno = wss->ws_errno;
+ if (wss->ws_errno == WRDSE_USERERR)
+ {
+ wsp->ws_usererr = wss->ws_usererr;
+ wss->ws_errno = WRDSE_EOF;
+ wss->ws_usererr = NULL;
+ }
+}
+
static void
@@ -95,2 +190,3 @@ wordsplit_init0 (struct wordsplit *wsp)
wordsplit_free_words (wsp);
+ wordsplit_clearerr (wsp);
}
@@ -104,5 +200,6 @@ wordsplit_init0 (struct wordsplit *wsp)
wsp->ws_errno = 0;
- wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
@@ -118,10 +215,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
- if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+ if (!(wsp->ws_flags & WRDSF_NOVAR))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_USAGE;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ /* These will be initialized on first variable assignment */
+ wsp->ws_envidx = wsp->ws_envsiz = 0;
+ wsp->ws_envbuf = NULL;
}
@@ -130,7 +224,8 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_NOSUPP;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ if (!wsp->ws_command)
+ {
+ _wsplt_seterr (wsp, WRDSE_USAGE);
+ errno = EINVAL;
+ return wsp->ws_errno;
+ }
}
@@ -165,6 +260,38 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ if (!(wsp->ws_flags & WRDSF_OPTIONS))
+ wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[WRDSX_WORD])
+ wsp->ws_escape[WRDSX_WORD] = "";
+ if (!wsp->ws_escape[WRDSX_QUOTE])
+ wsp->ws_escape[WRDSX_QUOTE] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
+ wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[WRDSX_WORD] = "";
+ wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
+
wsp->ws_endp = 0;
+ wsp->ws_wordi = 0;
+ if (wsp->ws_flags & WRDSF_REUSE)
+ wordsplit_free_nodes (wsp);
+ wsp->ws_head = wsp->ws_tail = NULL;
+
wordsplit_init0 (wsp);
-
+
return 0;
@@ -211,2 +338,3 @@ alloc_space (struct wordsplit *wsp, size_t count)
#define _WSNF_SEXP 0x20 /* is a sed expression */
+#define _WSNF_DELIM 0x40 /* node is a delimiter */
@@ -235,3 +363,3 @@ wsnode_flagstr (int flags)
{
- static char retbuf[6];
+ static char retbuf[7];
char *p = retbuf;
@@ -260,2 +388,6 @@ wsnode_flagstr (int flags)
*p++ = '-';
+ if (flags & _WSNF_DELIM)
+ *p++ = 'd';
+ else
+ *p++ = '-';
*p = 0;
@@ -340,2 +472,10 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
+static struct wordsplit_node *
+wsnode_tail (struct wordsplit_node *p)
+{
+ while (p && p->next)
+ p = p->next;
+ return p;
+}
+
static void
@@ -355,5 +495,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
{
+ struct wordsplit_node *tail = wsnode_tail (node);
node->prev = NULL;
- node->next = anchor;
- anchor->prev = node;
+ tail->next = anchor;
+ anchor->prev = tail;
wsp->ws_head = node;
@@ -364,2 +505,3 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
struct wordsplit_node *p;
+ struct wordsplit_node *tail = wsnode_tail (node);
@@ -367,6 +509,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
if (p)
- p->prev = node;
+ p->prev = tail;
else
- wsp->ws_tail = node;
- node->next = p;
+ wsp->ws_tail = tail;
+ tail->next = p;
node->prev = anchor;
@@ -417,6 +559,8 @@ wordsplit_dump_nodes (struct wordsplit *wsp)
if (p->flags & _WSNF_WORD)
- wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
else
- wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags),
@@ -435,2 +579,5 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
+ if (!(node->flags & _WSNF_JOIN))
+ return 0;
+
for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
@@ -459,2 +606,3 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
{
+ node->flags |= p->flags & _WSNF_QUOTE;
wsnode_remove (wsp, p);
@@ -478,2 +626,6 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
@@ -482,5 +634,2 @@ wsnode_quoteremoval (struct wordsplit *wsp)
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
@@ -493,5 +642,3 @@ wsnode_quoteremoval (struct wordsplit *wsp)
if (wsp->ws_flags & WRDSF_QUOTE)
- {
- unquote = !(p->flags & _WSNF_NOEXPAND);
- }
+ unquote = !(p->flags & _WSNF_NOEXPAND);
else
@@ -512,7 +659,4 @@ wsnode_quoteremoval (struct wordsplit *wsp)
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
@@ -537,2 +681,21 @@ wsnode_coalesce (struct wordsplit *wsp)
static int
+wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
+{
+ if (p->next)
+ {
+ struct wordsplit_node *np = p;
+ while (np && np->next)
+ {
+ np->flags |= _WSNF_JOIN;
+ np = np->next;
+ }
+ if (coalesce_segment (wsp, p))
+ return 1;
+ }
+ return 0;
+}
+
+static size_t skip_delim (struct wordsplit *wsp);
+
+static int
wordsplit_finish (struct wordsplit *wsp)
@@ -541,15 +704,133 @@ wordsplit_finish (struct wordsplit *wsp)
size_t n;
+ int delim;
- n = 0;
+ /* Postprocess delimiters. It would be rather simple, if it weren't for
+ the incremental operation.
- for (p = wsp->ws_head; p; p = p->next)
- n++;
+ Nodes of type _WSNF_DELIM get inserted to the node list if either
+ WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
+
+ The following cases should be distinguished:
+
+ 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
+ any runs of similar delimiter nodes to a single node. The nodes are
+ 'similar' if they point to the same delimiter character.
+
+ If WRDSO_MAXWORDS option is set, stop compressing when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
+ remove any delimiter nodes. Stop operation when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 3. If incremental operation is in progress, restart the loop any time
+ a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
+ is set.
+ */
+ again:
+ delim = 0; /* Delimiter being processed (if any) */
+ n = 0; /* Number of words processed so far */
+ p = wsp->ws_head; /* Current node */
+
+ while (p)
+ {
+ struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM)
+ {
+ if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ {
+ if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
+ {
+ char const *s = wsnode_ptr (wsp, p);
+ if (delim)
+ {
+ if (delim == *s)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ else
+ {
+ delim = 0;
+ n++; /* Count this node; it will be returned */
+ }
+ }
+ else
+ {
+ delim = *s;
+ p = next;
+ continue;
+ }
+ }
+ }
+ else if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ }
+ else
+ {
+ if (delim)
+ {
+ /* Last node was a delimiter or a compressed run of delimiters;
+ Count it, and clear the delimiter marker */
+ n++;
+ delim = 0;
+ }
+ if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
+ break;
+ }
+ }
+ n++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ p = NULL; /* Break the loop */
+ else
+ p = next;
+ }
+
+ if (p)
+ {
+ /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
+ words have already been collected. Reconstruct a single final
+ node from the remaining nodes. */
+ if (wsnode_tail_coalesce (wsp, p))
+ return wsp->ws_errno;
+ n++;
+ }
+
+ if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
+ {
+ /* The loop above have eliminated all nodes. Restart the
+ processing, if there's any input left. */
+ if (wsp->ws_endp < wsp->ws_len)
+ {
+ int rc;
+ if (wsp->ws_flags & WRDSF_SHOWDBG)
+ wsp->ws_debug (_("Restarting"));
+ rc = wordsplit_process_list (wsp, skip_delim (wsp));
+ if (rc)
+ return rc;
+ }
+ else
+ {
+ wsp->ws_error = WRDSE_EOF;
+ return WRDSE_EOF;
+ }
+ goto again;
+ }
if (alloc_space (wsp, n + 1))
- return 1;
+ return wsp->ws_errno;
- for (p = wsp->ws_head; p; p = p->next)
+ while (wsp->ws_head)
{
- const char *str = wsnode_ptr (wsp, p);
- size_t slen = wsnode_len (p);
+ const char *str = wsnode_ptr (wsp, wsp->ws_head);
+ size_t slen = wsnode_len (wsp->ws_head);
char *newstr = malloc (slen + 1);
@@ -565,4 +846,9 @@ wordsplit_finish (struct wordsplit *wsp)
+ wsnode_remove (wsp, wsp->ws_head);
+
wsp->ws_wordc++;
+ wsp->ws_wordi++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ break;
}
@@ -572,3 +858,31 @@ wordsplit_finish (struct wordsplit *wsp)
+int
+wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
+{
+ int rc;
+ size_t i;
+ rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
+ if (rc)
+ return rc;
+ for (i = 0; i < argc; i++)
+ {
+ char *newstr = strdup (argv[i]);
+ if (!newstr)
+ {
+ while (i > 0)
+ {
+ free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
+ i--;
+ }
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
+ }
+ wsp->ws_wordc += i;
+ wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
+ return 0;
+}
+
/* Variable expansion */
@@ -609,6 +923,6 @@ node_split_prefix (struct wordsplit *wsp,
static int
-find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
+find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
+ char const *paren)
{
- enum
- { st_init, st_squote, st_dquote } state = st_init;
+ enum { st_init, st_squote, st_dquote } state = st_init;
size_t level = 1;
@@ -622,14 +936,19 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
{
- case '{':
- level++;
- break;
-
- case '}':
- if (--level == 0)
+ default:
+ if (str[i] == paren[0])
+ {
+ level++;
+ break;
+ }
+ else if (str[i] == paren[1])
{
- *poff = i;
- return 0;
+ if (--level == 0)
+ {
+ *poff = i;
+ return 0;
+ }
+ break;
}
break;
-
+
case '"':
@@ -660,4 +979,5 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff)
-static const char *
-wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
+static int
+wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
+ char const **ret)
{
@@ -666,3 +986,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (!(wsp->ws_flags & WRDSF_ENV))
- return NULL;
+ return WRDSE_UNDEF;
@@ -675,3 +995,6 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
- return wsp->ws_env[i + 1];
+ {
+ *ret = wsp->ws_env[i + 1];
+ return WRDSE_OK;
+ }
/* Skip the value. Break the loop if it is NULL. */
@@ -682,3 +1005,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
}
- else
+ else if (wsp->ws_env)
{
@@ -694,6 +1017,113 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len)
if (j == len && var[j] == '=')
- return var + j + 1;
+ {
+ *ret = var + j + 1;
+ return WRDSE_OK;
+ }
}
}
- return NULL;
+ return WRDSE_UNDEF;
+}
+
+static int
+wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
+ char *value)
+{
+ int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
+ char *v;
+
+ if (wsp->ws_envidx + n >= wsp->ws_envsiz)
+ {
+ size_t sz;
+ char **newenv;
+
+ if (!wsp->ws_envbuf)
+ {
+ if (wsp->ws_flags & WRDSF_ENV)
+ {
+ size_t i = 0, j;
+
+ if (wsp->ws_env)
+ {
+ for (; wsp->ws_env[i]; i++)
+ ;
+ }
+
+ sz = i + n + 1;
+
+ newenv = calloc (sz, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+
+ for (j = 0; j < i; j++)
+ {
+ newenv[j] = strdup (wsp->ws_env[j]);
+ if (!newenv[j])
+ {
+ for (; j > 1; j--)
+ free (newenv[j-1]);
+ free (newenv[j-1]);
+ return _wsplt_nomem (wsp);
+ }
+ }
+ newenv[j] = NULL;
+
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = i;
+ wsp->ws_envsiz = sz;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ else
+ {
+ newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_envidx = 0;
+ wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ wsp->ws_flags |= WRDSF_ENV;
+ }
+ }
+ else
+ {
+ wsp->ws_envsiz *= 2;
+ newenv = realloc (wsp->ws_envbuf,
+ wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
+ if (!newenv)
+ return _wsplt_nomem (wsp);
+ wsp->ws_envbuf = newenv;
+ wsp->ws_env = (const char**) wsp->ws_envbuf;
+ }
+ }
+
+ if (wsp->ws_flags & WRDSF_ENV_KV)
+ {
+ /* A key-value pair environment */
+ char *p = malloc (namelen + 1);
+ if (!p)
+ return _wsplt_nomem (wsp);
+ memcpy (p, name, namelen);
+ p[namelen] = 0;
+
+ v = strdup (value);
+ if (!v)
+ {
+ free (p);
+ return _wsplt_nomem (wsp);
+ }
+ wsp->ws_env[wsp->ws_envidx++] = p;
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ else
+ {
+ v = malloc (namelen + strlen(value) + 2);
+ if (!v)
+ return _wsplt_nomem (wsp);
+ memcpy (v, name, namelen);
+ v[namelen++] = '=';
+ strcpy(v + namelen, value);
+ wsp->ws_env[wsp->ws_envidx++] = v;
+ }
+ wsp->ws_env[wsp->ws_envidx++] = NULL;
+ return WRDSE_OK;
}
@@ -706,3 +1136,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
const char *defstr = NULL;
- const char *value;
+ char *value;
const char *vptr;
@@ -710,7 +1140,9 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
const char *start = str - 1;
-
- if (ISALPHA (str[0]) || str[0] == '_')
+ int rc;
+ struct wordsplit ws;
+
+ if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
- if (!(ISALNUM (str[i]) || str[i] == '_'))
+ if (!ISVARCHR (str[i]))
break;
@@ -723,26 +1155,32 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
for (i = 1; i < len; i++)
- if (str[i] == '}' || str[i] == ':')
- break;
- if (str[i] == ':')
{
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_cbrace (str, i + 1, len, &j))
+ if (str[i] == ':')
{
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
+ size_t j;
+
+ defstr = str + i + 1;
+ if (find_closing_paren (str, i + 1, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
+ }
+ else if (str[i] == '}')
+ {
+ defstr = NULL;
+ *pend = str + i;
+ break;
+ }
+ else if (strchr ("-+?=", str[i]))
+ {
+ size_t j;
+
+ defstr = str + i;
+ if (find_closing_paren (str, i, len, &j, "{}"))
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ *pend = str + j;
+ break;
}
- *pend = str + j;
- }
- else if (str[i] == '}')
- {
- defstr = NULL;
- *pend = str + i;
- }
- else
- {
- wsp->ws_errno = WRDSE_CBRACE;
- return 1;
}
+ if (i == len)
+ return _wsplt_seterr (wsp, WRDSE_CBRACE);
}
@@ -770,28 +1208,140 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
- vptr = wordsplit_find_env (wsp, str, i);
- if (vptr)
+ if (defstr && strchr("-+?=", defstr[0]) == 0)
{
- value = strdup (vptr);
- if (!value)
- return _wsplt_nomem (wsp);
+ rc = WRDSE_UNDEF;
+ defstr = NULL;
}
- else if (wsp->ws_flags & WRDSF_GETVAR)
- value = wsp->ws_getvar (str, i, wsp->ws_closure);
- else if (wsp->ws_flags & WRDSF_UNDEF)
+ else
{
- wsp->ws_errno = WRDSE_UNDEF;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return 1;
+ rc = wordsplit_find_env (wsp, str, i, &vptr);
+ if (rc == WRDSE_OK)
+ {
+ if (vptr)
+ {
+ value = strdup (vptr);
+ if (!value)
+ rc = WRDSE_NOSPACE;
+ }
+ else
+ rc = WRDSE_UNDEF;
+ }
+ else if (wsp->ws_flags & WRDSF_GETVAR)
+ rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
+ else