aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-12-08 12:03:40 +0200
committerSergey Poznyakoff <gray@gnu.org>2018-12-08 12:03:40 +0200
commit602f4d93070ac0e762e0cbe3ef72ba792f9c4811 (patch)
tree8047168c6b23de38973b494b6ec794a81faf3576
parent2a684f1cdd7723c2ded277ea2c7e66227b6f3ae1 (diff)
downloadvmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.gz
vmod-dbrw-602f4d93070ac0e762e0cbe3ef72ba792f9c4811.tar.bz2
Implement the $(urlprefixes) built-in function.
* NEWS: Update. * README: Update. * configure.ac: Version 2.2.91 * doc/vmod-dbrw.3: Document the use of $(urlprefixes) built-in * doc/vmod-dbrw.texi: Likewise. * src/vmod_dbrw.c (parse_flags): Make sure status string is null-terminated. (do_rewrite): Expand built-in functions in $(). Support urlprefixes. On debug_level=100, produce detailed trace of expansions. * src/wordsplit.c: Pull from grecs commit 9097d529. * src/wordsplit.h: Likewise. * tests/initdb.at (rewrite): Change the url column. * tests/rewrite01.at: Use $(urlprefixes) in the SQL templates. * tests/rewrite02.at: Likewise. * tests/rewrite03.at: Likewise. * tests/rewrite04.at: Likewise. * tests/rewrite05.at: Likewise. * tests/rewrite06.at: Likewise.
-rw-r--r--NEWS31
-rw-r--r--README3
-rw-r--r--configure.ac2
-rw-r--r--doc/vmod-dbrw.334
-rw-r--r--doc/vmod-dbrw.texi16
-rw-r--r--src/vmod_dbrw.c127
-rw-r--r--src/wordsplit.c1646
-rw-r--r--src/wordsplit.h214
-rw-r--r--tests/initdb.at10
-rw-r--r--tests/rewrite01.at5
-rw-r--r--tests/rewrite02.at5
-rw-r--r--tests/rewrite03.at5
-rw-r--r--tests/rewrite04.at5
-rw-r--r--tests/rewrite05.at5
-rw-r--r--tests/rewrite06.at5
15 files changed, 1664 insertions, 449 deletions
diff --git a/NEWS b/NEWS
index 913b0f9..c700564 100644
--- a/NEWS
+++ b/NEWS
@@ -1,12 +1,12 @@
-vmod-dbrw -- history of user-visible changes. 2018-01-30
+vmod-dbrw -- history of user-visible changes. 2018-12-08
See the end of file for copying conditions.
Please send vmod-dbrw bug reports to <gray@gnu.org>
-Version 2.2.90 (Git)
+Version 2.2.91 (Git)
* SQL idle timeout
For MySQL backend, the default connection idle timeout is set equal to
the value of the MySQL variable 'wait_timeout'. For Postgres, default
idle timeout is not yet implemented.
@@ -15,12 +15,39 @@ Idle timeout can be configured using the timeout configuration option,
e.g.:
dbrw.config("mysql", "database=dbrw;user=proxy;timeout=600",
{"select dest,pattern,value,flags from rewrite where
locate(url,'$url') = 1 order by weight asc;"});
+* The $() functions in SQL templates
+
+The SQL templates support the use of $() constructs for invoking
+built-in functions. So far one function is implemented:
+
+ $(urlprefixes PATH)
+
+It expands to comma-separated list of properly quoted pathname
+prefixes, constructed from its argument. Optional query part is
+stripped off the argument prior to expansion. For example
+
+ $(urlprefixes "/local/user/local?a=1")
+
+expands to:
+
+ '/local/user/local','/local/user','/local'
+
+This construct is intended for use in SQL IN conditionals, for
+example:
+
+ SELECT dest,pattern,value,flags
+ FROM rewrite
+ WHERE host='$host'
+ AND url IN ($(urlprefixes $url))
+ ORDER BY length(dest),value,weight DESC
+
+
Version 2.2, 2017-08-10
* Support for Varnish 5.1
diff --git a/README b/README
index be4e408..354099f 100644
--- a/README
+++ b/README
@@ -1,8 +1,7 @@
Vmod-dbrw README
-Copyright (C) 2013-2017 Sergey Poznyakoff
See the end of file for copying conditions.
* Introduction
This file contains brief information about configuring, testing
and running vmod-dbrw. It is *not* intended as a replacement
@@ -220,13 +219,13 @@ This way you won't need to supply them to `make check'.
Send bug reports and suggestions to <gray@gnu.org>
* Copyright information:
-Copyright (C) 2013-2017 Sergey Poznyakoff
+Copyright (C) 2013-2018 Sergey Poznyakoff
Permission is granted to anyone to make or distribute verbatim copies
of this document as received, in any medium, provided that the
copyright notice and this permission notice are preserved,
thus giving the recipient permission to redistribute in turn.
diff --git a/configure.ac b/configure.ac
index 7a1272c..1212a37 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11,13 +11,13 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with vmod-dbrw. If not, see <http://www.gnu.org/licenses/>.
AC_PREREQ(2.69)
-AC_INIT([vmod-dbrw], 2.2.90, [gray@gnu.org])
+AC_INIT([vmod-dbrw], 2.2.91, [gray@gnu.org])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR(src/vmod_dbrw.vcc)
AM_CONFIG_HEADER(config.h)
AC_SUBST([AC_VMOD_BASENAME],[dbrw])
diff --git a/doc/vmod-dbrw.3 b/doc/vmod-dbrw.3
index 6f1dba8..4760b6b 100644
--- a/doc/vmod-dbrw.3
+++ b/doc/vmod-dbrw.3
@@ -1,8 +1,8 @@
.\" This file is part of Vmod-dbrw -*- nroff -*-
-.\" Copyright (C) 2013-2017 Sergey Poznyakoff
+.\" Copyright (C) 2013-2018 Sergey Poznyakoff
.\"
.\" Vmod-dbrw is free software; you can redistribute it and/or modify
.\" it under the terms of the GNU General Public License as published by
.\" the Free Software Foundation; either version 3, or (at your option)
.\" any later version.
.\"
@@ -10,13 +10,13 @@
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
.\" GNU General Public License for more details.
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with vmod-dbrw. If not, see <http://www.gnu.org/licenses/>.
-.TH VMOD-DBRW 1 "January 30, 2018" "VMOD-DBRW" "User Reference"
+.TH VMOD-DBRW 1 "December 8, 2018" "VMOD-DBRW" "User Reference"
.SH NAME
vmod-dbrw \- Database-driven rewrite rules for Varnish Cache
.SH SYNOPSIS
.B import dbrw;
.PP
.BI "VOID dbrw.config(STRING " dbtype ", STRING " params ", STRING " query ");"
@@ -131,12 +131,38 @@ construct (a \fBvariable reference\fR) with the corresponding
\fIVALUE\fR from its argument. Similarly to the shell syntax, the
variable reference can be written as \fB${\fINAME\fB}\fR. This
form can be used in contexts where the variable name is immediately
followed by another letter, to prevent it from being counted as a part
of the name.
.PP
+The special construct
+.sp
+.EX
+$(urlprefixes \fIPATH\fR)
+.EE
+.sp
+expands to a comma-separated list of all possible path prefixes in
+\fIPATH\fR. Each element in the list is quoted, so the result can
+safely be used in SQL statements. For example,
+.sp
+.EX
+$(urlprefixes "/local/user/login")
+.EE
+.sp
+produces
+.sp
+.EX
+ '/local/user/login','/local/user','/local'
+.EE
+.PP
+This statement is usually used in \fBIN\fR SQL constructs, e.g.
+.sp
+.EX
+SELECT * FROM table WHERE url IN ($(urlprefixes $url))
+.EE
+.PP
The expanded query is then sent to the database server. Handling
of the return value depends on the number of fields it contains.
.SS Strict matches
If the returned set consists of one or two columns, only the
first tuple is used and the value of its first column is returned.
The second column (if present) is ignored.
@@ -245,13 +271,15 @@ The VCL:
.EX
sub vcl_recv {
# It is supposed that the url column contains an SQL-style
# wildcard pattern.
dbrw.config("mysql", "database=varnish;user=varnish;debug=10",
{"SELECT dest,pattern,value,flags FROM rewrite
- WHERE host='$host' and '$url' like url"});
+ WHERE host='$host'
+ AND url IN ($(urlprefixes $url))
+ ORDER BY LENGTH(dest),value DESC"});
set req.http.X-Redirect-To =
dbrw.rewrite("host=" + req.http.Host + ";" +
"url=" + req.url);
if (req.http.X-Redirect-To != "") {
return(synth(750, "Redirect"));
}
diff --git a/doc/vmod-dbrw.texi b/doc/vmod-dbrw.texi
index 40fc84d..31b15d1 100644
--- a/doc/vmod-dbrw.texi
+++ b/doc/vmod-dbrw.texi
@@ -493,25 +493,35 @@ WHERE host='$host'
AND LOCATE(url,'$url')==1
ORDER BY weight
@end group
@end example
@noindent
-Furthermore, the @code{url} column can contain a SQL wildcard pattern,
-in which case the query will look like:
+Furthermore, the @code{url} column can contain a path prefix,
+which can be matched using the @code{IN} conditional:
@example
@group
SELECT dest,pattern,value,flags
FROM rewrite
WHERE host='$host'
-AND '$url' like $url
+AND url IN ($(urlprefixes $url))
ORDER BY weight
@end group
@end example
+Notice the use of the @samp{$(urlprefixes $url)}. This invokes the built-in
+@dfn{function} @code{urlprefixes}, which expands to comma-separated
+list of properly quoted pathname prefixes, constructed from its
+argument. For example, if @samp{$url} is @samp{/local/user/local?a=1},
+then the expansion of @samp{$(urlprefixes $url)} is:
+
+@example
+'/local/user/local','/local/user','/local'
+@end example
+
@node Rewrite
@chapter The @code{rewrite} Function
@deftypefn {function} string rewrite (string @var{args})
This function is the working horse of the module. It rewrites its
argument using the database configured in the previous call to
diff --git a/src/vmod_dbrw.c b/src/vmod_dbrw.c
index d6785d7..63d4ea1 100644
--- a/src/vmod_dbrw.c
+++ b/src/vmod_dbrw.c
@@ -186,23 +186,27 @@ parse_flags(const char *arg, int *qdisp, int *flags, char status[])
*qdisp = QDISP_DISCARD;
else if (strncmp(ws.ws_wordv[i], "redirect=", 9) == 0) {
if (!is_http_status(ws.ws_wordv[i] + 9)) {
dbrw_error("invalid status code: %s",
ws.ws_wordv[i] + 9);
rc = 1;
- } else
+ } else {
strncpy(status, ws.ws_wordv[i] + 9,
HTTP_STATUS_LEN);
+ status[HTTP_STATUS_LEN] = 0;
+ }
} else if (strncmp(ws.ws_wordv[i], "R=", 2) == 0) {
if (!is_http_status(ws.ws_wordv[i] + 2)) {
dbrw_error("invalid status code: %s",
ws.ws_wordv[i] + 2);
rc = 1;
- } else
+ } else {
strncpy(status, ws.ws_wordv[i] + 2,
HTTP_STATUS_LEN);
+ status[HTTP_STATUS_LEN] = 0;
+ }
} else {
dbrw_error("unrecognized flag: %s", ws.ws_wordv[i]);
rc = 1;
}
}
@@ -500,19 +504,119 @@ findmatch(VRT_CTX, struct dbrw_connection *conn, char **param)
if (wsflags & WRDSF_REUSE)
wordsplit_free(&ws);
return res;
}
+static int
+expand_error(char **ret, char const *func, char const *msg)
+{
+ static char delim[] = ": ";
+ *ret = malloc(strlen(func) + strlen(msg) + 1);
+ if (*ret) {
+ strcat(strcat(strcpy(*ret, func), delim), msg);
+ return WRDSE_USERERR;
+ } else
+ return WRDSE_NOSPACE;
+}
+
+static int
+expand_urlprefixes(struct dbrw_connection *cp, char **argv, char **ret)
+{
+ char *arg;
+ size_t n, len, i, j;
+ char *q, *res;
+
+ if (argv[1] == NULL || argv[2] != NULL)
+ return expand_error(ret, argv[0], "bad arguments");
+
+ /* Create a copy of the argument */
+ if (cp->conf->backend->sql_escape) {
+ arg = sql_escape(cp, argv[1]);
+ } else {
+ arg = strdup(argv[1]);
+ }
+ if (!arg)
+ return WRDSE_NOSPACE;
+
+ /* Cut off eventual query */
+ i = j = strcspn(arg, "?");
+ arg[i] = 0;
+
+ /* Compute the resulting length */
+ len = i;
+ n = 1;
+ for (; i > 0; i--) {
+ if (arg[i] == '/') {
+ len += i;
+ n++;
+ }
+ }
+
+ /* Count quotes around each member */
+ len += n * 2 + n - 1;
+
+ /* Allocate the result */
+ res = malloc(len + 1);
+ if (!res) {
+ free(arg);
+ return WRDSE_NOSPACE;
+ }
+
+ /* Format the result */
+ q = res;
+ i = j;
+ while (i) {
+ if (q > res)
+ *q++ = ',';
+ *q++ = '\'';
+ memcpy(q, arg, i);
+ q += i;
+ *q++ = '\'';
+ i--;
+ while (i > 0 && arg[i] != '/')
+ i--;
+ }
+ *q = 0;
+ *ret = res;
+ free(arg);
+
+ return WRDSE_OK;
+}
+
+static struct expcom {
+ char *com;
+ int (*exp) (struct dbrw_connection *, char **, char **);
+} expcomtab[] = {
+ { "urlprefixes", expand_urlprefixes },
+ { NULL }
+};
+
+static int
+query_command_expand(char **ret, const char *cmd, size_t len, char **argv,
+ void *clos)
+{
+ struct expcom *ec;
+ static char diagmsg[] = "unknown command: ";
+
+ for (ec = expcomtab; ec->com; ec++) {
+ if (strcmp(ec->com, argv[0]) == 0)
+ return ec->exp(clos, argv, ret);
+ }
+
+ return expand_error(ret, argv[0], "unknown command");
+}
+
static char *
do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg)
{
struct wordsplit ws, wsenv;
int i, rc;
char *res;
-
+ int wsflags;
+
if (sql_connect(cp) || cp->state != state_connected)
return NULL;
debug(cp->conf, 2, ("vmod_rewrite: splitting arg"));
wsenv.ws_delim = ";";
if (wordsplit(arg, &wsenv, WRDSF_NOVAR|WRDSF_NOCMD|WRDSF_DELIM)) {
@@ -533,18 +637,25 @@ do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg)
free(wsenv.ws_wordv[i]);
wsenv.ws_wordv[i] = p;
debug(cp->conf, 3, ("%d: %s",i,p));
}
}
- debug(cp->conf, 2, ("expanding query"));
+ debug(cp->conf, 2, ("expanding query {\"%s\"}", cp->conf->query));
ws.ws_env = (const char **)wsenv.ws_wordv;
- rc = wordsplit(cp->conf->query, &ws,
- WRDSF_NOCMD | WRDSF_QUOTE |
- WRDSF_NOSPLIT |
- WRDSF_ENV | WRDSF_UNDEF);
+ ws.ws_command = query_command_expand;
+ ws.ws_closure = cp;
+ wsflags = WRDSF_NOSPLIT | WRDSF_CLOSURE | WRDSF_ENV | WRDSF_UNDEF;
+
+ if (cp->conf->debug_level == 100) {
+ ws.ws_debug = dbrw_debug;
+ wsflags |= WRDSF_DEBUG | WRDSF_SHOWDBG;
+ }
+
+ rc = wordsplit(cp->conf->query, &ws, wsflags);
+
if (rc) {
dbrw_error("cannot expand query `%s': %s",
cp->conf->query, wordsplit_strerror(&ws));
wordsplit_free(&wsenv);
return NULL;
}
diff --git a/src/wordsplit.c b/src/wordsplit.c
index f4740bf..bad59b1 100644
--- a/src/wordsplit.c
+++ b/src/wordsplit.c
@@ -1,8 +1,8 @@
/* wordsplit - a word splitter
- Copyright (C) 2009-2014 Sergey Poznyakoff
+ Copyright (C) 2009-2018 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
@@ -22,12 +22,14 @@
#include <ctype.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
+#include <pwd.h>
+#include <glob.h>
#if ENABLE_NLS
# include <gettext.h>
#else
# define gettext(msgid) msgid
#endif
@@ -45,23 +47,29 @@
#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
+#define ISVARBEG(c) (ISALPHA(c) || c == '_')
+#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+
+#define WSP_RETURN_DELIMS(wsp) \
+ ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
+
#define ALLOC_INIT 128
#define ALLOC_INCR 128
static void
_wsplt_alloc_die (struct wordsplit *wsp)
{
- wsp->ws_error (_("memory exhausted"));
+ wsp->ws_error ("%s", _("memory exhausted"));
abort ();
}
-static void
+static void
_wsplt_error (const char *fmt, ...)
{
va_list ap;
va_start (ap, fmt);
vfprintf (stderr, fmt, ap);
@@ -69,12 +77,21 @@ _wsplt_error (const char *fmt, ...)
fputc ('\n', stderr);
}
static void wordsplit_free_nodes (struct wordsplit *);
static int
+_wsplt_seterr (struct wordsplit *wsp, int ec)
+{
+ wsp->ws_errno = ec;
+ if (wsp->ws_flags & WRDSF_SHOWERR)
+ wordsplit_perror (wsp);
+ return ec;
+}
+
+static int
_wsplt_nomem (struct wordsplit *wsp)
{
errno = ENOMEM;
wsp->ws_errno = WRDSE_NOSPACE;
if (wsp->ws_flags & WRDSF_ENOMEMABRT)
wsp->ws_alloc_die (wsp);
@@ -83,59 +100,137 @@ _wsplt_nomem (struct wordsplit *wsp)
if (!(wsp->ws_flags & WRDSF_REUSE))
wordsplit_free (wsp);
wordsplit_free_nodes (wsp);
return wsp->ws_errno;
}
+static int wordsplit_run (const char *command, size_t length,
+ struct wordsplit *wsp,
+ int flags, int lvl);
+
+static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ int flags);
+static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
+static int wordsplit_finish (struct wordsplit *wsp);
+
+static int
+_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
+ char const *str, int len,
+ int flags, int finalize)
+{
+ int rc;
+
+ wss->ws_delim = wsp->ws_delim;
+ wss->ws_debug = wsp->ws_debug;
+ wss->ws_error = wsp->ws_error;
+ wss->ws_alloc_die = wsp->ws_alloc_die;
+
+ if (!(flags & WRDSF_NOVAR))
+ {
+ wss->ws_env = wsp->ws_env;
+ wss->ws_getvar = wsp->ws_getvar;
+ flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
+ }
+ if (!(flags & WRDSF_NOCMD))
+ {
+ wss->ws_command = wsp->ws_command;
+ }
+
+ if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
+ {
+ wss->ws_closure = wsp->ws_closure;
+ flags |= wsp->ws_flags & WRDSF_CLOSURE;
+ }
+
+ wss->ws_options = wsp->ws_options;
+
+ flags |= WRDSF_DELIM
+ | WRDSF_ALLOC_DIE
+ | WRDSF_ERROR
+ | WRDSF_DEBUG
+ | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
+
+ rc = wordsplit_init (wss, str, len, flags);
+ if (rc)
+ return rc;
+ wss->ws_lvl = wsp->ws_lvl + 1;
+ rc = wordsplit_process_list (wss, 0);
+ if (rc)
+ {
+ wordsplit_free_nodes (wss);
+ return rc;
+ }
+ if (finalize)
+ {
+ rc = wordsplit_finish (wss);
+ wordsplit_free_nodes (wss);
+ }
+ return rc;
+}
+
+static void
+_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
+{
+ if (wsp->ws_errno == WRDSE_USERERR)
+ free (wsp->ws_usererr);
+ wsp->ws_errno = wss->ws_errno;
+ if (wss->ws_errno == WRDSE_USERERR)
+ {
+ wsp->ws_usererr = wss->ws_usererr;
+ wss->ws_errno = WRDSE_EOF;
+ wss->ws_usererr = NULL;
+ }
+}
+
static void
wordsplit_init0 (struct wordsplit *wsp)
{
if (wsp->ws_flags & WRDSF_REUSE)
{
if (!(wsp->ws_flags & WRDSF_APPEND))
wordsplit_free_words (wsp);
+ wordsplit_clearerr (wsp);
}
else
{
wsp->ws_wordv = NULL;
wsp->ws_wordc = 0;
wsp->ws_wordn = 0;
}
wsp->ws_errno = 0;
- wsp->ws_head = wsp->ws_tail = NULL;
}
+char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
+
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
{
wsp->ws_flags = flags;
if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
wsp->ws_alloc_die = _wsplt_alloc_die;
if (!(wsp->ws_flags & WRDSF_ERROR))
wsp->ws_error = _wsplt_error;
- if (!(wsp->ws_flags & WRDSF_NOVAR)
- && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR)))
+ if (!(wsp->ws_flags & WRDSF_NOVAR))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_USAGE;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ /* These will be initialized on first variable assignment */
+ wsp->ws_envidx = wsp->ws_envsiz = 0;
+ wsp->ws_envbuf = NULL;
}
if (!(wsp->ws_flags & WRDSF_NOCMD))
{
- errno = EINVAL;
- wsp->ws_errno = WRDSE_NOSUPP;
- if (wsp->ws_flags & WRDSF_SHOWERR)
- wordsplit_perror (wsp);
- return wsp->ws_errno;
+ if (!wsp->ws_command)
+ {
+ _wsplt_seterr (wsp, WRDSE_USAGE);
+ errno = EINVAL;
+ return wsp->ws_errno;
+ }
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
{
if (!(wsp->ws_flags & WRDSF_DEBUG))
{
@@ -160,16 +255,48 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_COMMENT))
wsp->ws_comment = NULL;
if (!(wsp->ws_flags & WRDSF_CLOSURE))
wsp->ws_closure = NULL;
+ if (!(wsp->ws_flags & WRDSF_OPTIONS))
+ wsp->ws_options = 0;
+
+ if (wsp->ws_flags & WRDSF_ESCAPE)
+ {
+ if (!wsp->ws_escape[WRDSX_WORD])
+ wsp->ws_escape[WRDSX_WORD] = "";
+ if (!wsp->ws_escape[WRDSX_QUOTE])
+ wsp->ws_escape[WRDSX_QUOTE] = "";
+ }
+ else
+ {
+ if (wsp->ws_flags & WRDSF_CESCAPES)
+ {
+ wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
+ wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
+ wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
+ | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
+ }
+ else
+ {
+ wsp->ws_escape[WRDSX_WORD] = "";
+ wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
+ wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
+ }
+ }
+
wsp->ws_endp = 0;
+ wsp->ws_wordi = 0;
+ if (wsp->ws_flags & WRDSF_REUSE)
+ wordsplit_free_nodes (wsp);
+ wsp->ws_head = wsp->ws_tail = NULL;
+
wordsplit_init0 (wsp);
-
+
return 0;
}
static int
alloc_space (struct wordsplit *wsp, size_t count)
{
@@ -206,12 +333,13 @@ alloc_space (struct wordsplit *wsp, size_t count)
#define _WSNF_NULL 0x01 /* null node (a noop) */
#define _WSNF_WORD 0x02 /* node contains word in v.word */
#define _WSNF_QUOTE 0x04 /* text is quoted */
#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
#define _WSNF_SEXP 0x20 /* is a sed expression */
+#define _WSNF_DELIM 0x40 /* node is a delimiter */
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
wordsplit_add_segm must add the
segment even if it is empty */
struct wordsplit_node
@@ -230,13 +358,13 @@ struct wordsplit_node
} v;
};
static const char *
wsnode_flagstr (int flags)
{
- static char retbuf[6];
+ static char retbuf[7];
char *p = retbuf;
if (flags & _WSNF_WORD)
*p++ = 'w';
else if (flags & _WSNF_NULL)
*p++ = 'n';
@@ -255,12 +383,16 @@ wsnode_flagstr (int flags)
else
*p++ = '-';
if (flags & _WSNF_SEXP)
*p++ = 's';
else
*p++ = '-';
+ if (flags & _WSNF_DELIM)
+ *p++ = 'd';
+ else
+ *p++ = '-';
*p = 0;
return retbuf;
}
static const char *
wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
@@ -335,12 +467,20 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
else
wsp->ws_tail = node->prev;
node->next = node->prev = NULL;
}
+static struct wordsplit_node *
+wsnode_tail (struct wordsplit_node *p)
+{
+ while (p && p->next)
+ p = p->next;
+ return p;
+}
+
static void
wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
struct wordsplit_node *anchor, int before)
{
if (!wsp->ws_head)
{
@@ -350,28 +490,30 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
else if (before)
{
if (anchor->prev)
wsnode_insert (wsp, node, anchor->prev, 0);
else
{
+ struct wordsplit_node *tail = wsnode_tail (node);
node->prev = NULL;
- node->next = anchor;
- anchor->prev = node;
+ tail->next = anchor;
+ anchor->prev = tail;
wsp->ws_head = node;
}
}
else
{
struct wordsplit_node *p;
+ struct wordsplit_node *tail = wsnode_tail (node);
p = anchor->next;
if (p)
- p->prev = node;
+ p->prev = tail;
else
- wsp->ws_tail = node;
- node->next = p;
+ wsp->ws_tail = tail;
+ tail->next = p;
node->prev = anchor;
anchor->next = node;
}
}
static int
@@ -412,16 +554,18 @@ wordsplit_dump_nodes (struct wordsplit *wsp)
struct wordsplit_node *p;
int n = 0;
for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
{
if (p->flags & _WSNF_WORD)
- wsp->ws_debug ("%4d: %p: %#04x (%s):%s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
else
- wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
+ wsp->ws_lvl,
n, p, p->flags, wsnode_flagstr (p->flags),
(int) (p->v.segm.end - p->v.segm.beg),
wsp->ws_input + p->v.segm.beg);
}
}
@@ -430,12 +574,15 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
{
struct wordsplit_node *p, *end;
size_t len = 0;
char *buf, *cur;
int stop;
+ if (!(node->flags & _WSNF_JOIN))
+ return 0;
+
for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
{
len += wsnode_len (p);
}
if (p)
len += wsnode_len (p);
@@ -454,12 +601,13 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
size_t slen = wsnode_len (p);
memcpy (cur, str, slen);
cur += slen;
if (p != node)
{
+ node->flags |= p->flags & _WSNF_QUOTE;
wsnode_remove (wsp, p);
stop = p == end;
wsnode_free (p);
}
p = next;
}
@@ -473,30 +621,29 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
else
node->flags |= _WSNF_WORD;
node->v.word = buf;
return 0;
}
+static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
+ char *dst, const char *src,
+ size_t n);
+
static int
wsnode_quoteremoval (struct wordsplit *wsp)
{
struct wordsplit_node *p;
- void (*uqfn) (char *, const char *, size_t) =
- (wsp->ws_flags & WRDSF_CESCAPES) ?
- wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
for (p = wsp->ws_head; p; p = p->next)
{
const char *str = wsnode_ptr (wsp, p);
size_t slen = wsnode_len (p);
int unquote;
if (wsp->ws_flags & WRDSF_QUOTE)
- {
- unquote = !(p->flags & _WSNF_NOEXPAND);
- }
+ unquote = !(p->flags & _WSNF_NOEXPAND);
else
unquote = 0;
if (unquote)
{
if (!(p->flags & _WSNF_WORD))
@@ -507,17 +654,14 @@ wsnode_quoteremoval (struct wordsplit *wsp)
memcpy (newstr, str, slen);
newstr[slen] = 0;
p->v.word = newstr;
p->flags |= _WSNF_WORD;
}
- if (wsp->ws_flags & WRDSF_ESCAPE)
- wordsplit_general_unquote_copy (p->v.word, str, slen,
- wsp->ws_escape);
- else
- uqfn (p->v.word, str, slen);
+ wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
+ p->v.word, str, slen);
}
}
return 0;
}
static int
@@ -532,48 +676,218 @@ wsnode_coalesce (struct wordsplit *wsp)
return 1;
}
return 0;
}
static int
+wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
+{
+ if (p->next)
+ {
+ struct wordsplit_node *np = p;
+ while (np && np->next)
+ {
+ np->flags |= _WSNF_JOIN;
+ np = np->next;
+ }
+ if (coalesce_segment (wsp, p))
+ return 1;
+ }
+ return 0;
+}
+
+static size_t skip_delim (struct wordsplit *wsp);
+
+static int
wordsplit_finish (struct wordsplit *wsp)
{
struct wordsplit_node *p;
size_t n;
+ int delim;
- n = 0;
+ /* Postprocess delimiters. It would be rather simple, if it weren't for
+ the incremental operation.
- for (p = wsp->ws_head; p; p = p->next)
- n++;
+ Nodes of type _WSNF_DELIM get inserted to the node list if either
+ WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
+
+ The following cases should be distinguished:
+
+ 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
+ any runs of similar delimiter nodes to a single node. The nodes are
+ 'similar' if they point to the same delimiter character.
+
+ If WRDSO_MAXWORDS option is set, stop compressing when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
+ remove any delimiter nodes. Stop operation when
+ ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
+ a single last node.
+
+ 3. If incremental operation is in progress, restart the loop any time
+ a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
+ is set.
+ */
+ again:
+ delim = 0; /* Delimiter being processed (if any) */
+ n = 0; /* Number of words processed so far */
+ p = wsp->ws_head; /* Current node */
+
+ while (p)
+ {
+ struct wordsplit_node *next = p->next;
+ if (p->flags & _WSNF_DELIM)
+ {
+ if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
+ {
+ if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
+ {
+ char const *s = wsnode_ptr (wsp, p);
+ if (delim)
+ {
+ if (delim == *s)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ else
+ {
+ delim = 0;
+ n++; /* Count this node; it will be returned */
+ }
+ }
+ else
+ {
+ delim = *s;
+ p = next;
+ continue;
+ }
+ }
+ }
+ else if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ wsnode_remove (wsp, p);
+ p = next;
+ continue;
+ }
+ }
+ else
+ {
+ if (delim)
+ {
+ /* Last node was a delimiter or a compressed run of delimiters;
+ Count it, and clear the delimiter marker */
+ n++;
+ delim = 0;
+ }
+ if (wsp->ws_options & WRDSO_MAXWORDS)
+ {
+ if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
+ break;
+ }
+ }
+ n++;
+ if (wsp->ws_flags & WRDSF_INCREMENTAL)
+ p = NULL; /* Break the loop */
+ else
+ p = next;
+ }
+
+ if (p)
+ {
+ /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
+ words have already been collected. Reconstruct a single final
+ node from the remaining nodes.