diff options
-rw-r--r-- | NEWS | 31 | ||||
-rw-r--r-- | README | 3 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | doc/vmod-dbrw.3 | 34 | ||||
-rw-r--r-- | doc/vmod-dbrw.texi | 16 | ||||
-rw-r--r-- | src/vmod_dbrw.c | 127 | ||||
-rw-r--r-- | src/wordsplit.c | 1646 | ||||
-rw-r--r-- | src/wordsplit.h | 214 | ||||
-rw-r--r-- | tests/initdb.at | 10 | ||||
-rw-r--r-- | tests/rewrite01.at | 5 | ||||
-rw-r--r-- | tests/rewrite02.at | 5 | ||||
-rw-r--r-- | tests/rewrite03.at | 5 | ||||
-rw-r--r-- | tests/rewrite04.at | 5 | ||||
-rw-r--r-- | tests/rewrite05.at | 5 | ||||
-rw-r--r-- | tests/rewrite06.at | 5 |
15 files changed, 1664 insertions, 449 deletions
@@ -1,2 +1,2 @@ -vmod-dbrw -- history of user-visible changes. 2018-01-30 +vmod-dbrw -- history of user-visible changes. 2018-12-08 See the end of file for copying conditions. @@ -5,3 +5,3 @@ Please send vmod-dbrw bug reports to <gray@gnu.org> -Version 2.2.90 (Git) +Version 2.2.91 (Git) @@ -20,2 +20,29 @@ e.g.: +* The $() functions in SQL templates + +The SQL templates support the use of $() constructs for invoking +built-in functions. So far one function is implemented: + + $(urlprefixes PATH) + +It expands to comma-separated list of properly quoted pathname +prefixes, constructed from its argument. Optional query part is +stripped off the argument prior to expansion. For example + + $(urlprefixes "/local/user/local?a=1") + +expands to: + + '/local/user/local','/local/user','/local' + +This construct is intended for use in SQL IN conditionals, for +example: + + SELECT dest,pattern,value,flags + FROM rewrite + WHERE host='$host' + AND url IN ($(urlprefixes $url)) + ORDER BY length(dest),value,weight DESC + + @@ -1,3 +1,2 @@ Vmod-dbrw README -Copyright (C) 2013-2017 Sergey Poznyakoff See the end of file for copying conditions. @@ -225,3 +224,3 @@ Send bug reports and suggestions to <gray@gnu.org> -Copyright (C) 2013-2017 Sergey Poznyakoff +Copyright (C) 2013-2018 Sergey Poznyakoff diff --git a/configure.ac b/configure.ac index 7a1272c..1212a37 100644 --- a/configure.ac +++ b/configure.ac @@ -16,3 +16,3 @@ AC_PREREQ(2.69) -AC_INIT([vmod-dbrw], 2.2.90, [gray@gnu.org]) +AC_INIT([vmod-dbrw], 2.2.91, [gray@gnu.org]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/doc/vmod-dbrw.3 b/doc/vmod-dbrw.3 index 6f1dba8..4760b6b 100644 --- a/doc/vmod-dbrw.3 +++ b/doc/vmod-dbrw.3 @@ -1,3 +1,3 @@ .\" This file is part of Vmod-dbrw -*- nroff -*- -.\" Copyright (C) 2013-2017 Sergey Poznyakoff +.\" Copyright (C) 2013-2018 Sergey Poznyakoff .\" @@ -15,3 +15,3 @@ .\" along with vmod-dbrw. If not, see <http://www.gnu.org/licenses/>. -.TH VMOD-DBRW 1 "January 30, 2018" "VMOD-DBRW" "User Reference" +.TH VMOD-DBRW 1 "December 8, 2018" "VMOD-DBRW" "User Reference" .SH NAME @@ -136,2 +136,28 @@ of the name. .PP +The special construct +.sp +.EX +$(urlprefixes \fIPATH\fR) +.EE +.sp +expands to a comma-separated list of all possible path prefixes in +\fIPATH\fR. Each element in the list is quoted, so the result can +safely be used in SQL statements. For example, +.sp +.EX +$(urlprefixes "/local/user/login") +.EE +.sp +produces +.sp +.EX + '/local/user/login','/local/user','/local' +.EE +.PP +This statement is usually used in \fBIN\fR SQL constructs, e.g. +.sp +.EX +SELECT * FROM table WHERE url IN ($(urlprefixes $url)) +.EE +.PP The expanded query is then sent to the database server. Handling @@ -250,3 +276,5 @@ sub vcl_recv { {"SELECT dest,pattern,value,flags FROM rewrite - WHERE host='$host' and '$url' like url"}); + WHERE host='$host' + AND url IN ($(urlprefixes $url)) + ORDER BY LENGTH(dest),value DESC"}); set req.http.X-Redirect-To = diff --git a/doc/vmod-dbrw.texi b/doc/vmod-dbrw.texi index 40fc84d..31b15d1 100644 --- a/doc/vmod-dbrw.texi +++ b/doc/vmod-dbrw.texi @@ -498,4 +498,4 @@ ORDER BY weight @noindent -Furthermore, the @code{url} column can contain a SQL wildcard pattern, -in which case the query will look like: +Furthermore, the @code{url} column can contain a path prefix, +which can be matched using the @code{IN} conditional: @@ -506,3 +506,3 @@ FROM rewrite WHERE host='$host' -AND '$url' like $url +AND url IN ($(urlprefixes $url)) ORDER BY weight @@ -511,2 +511,12 @@ ORDER BY weight +Notice the use of the @samp{$(urlprefixes $url)}. This invokes the built-in +@dfn{function} @code{urlprefixes}, which expands to comma-separated +list of properly quoted pathname prefixes, constructed from its +argument. For example, if @samp{$url} is @samp{/local/user/local?a=1}, +then the expansion of @samp{$(urlprefixes $url)} is: + +@example +'/local/user/local','/local/user','/local' +@end example + @node Rewrite diff --git a/src/vmod_dbrw.c b/src/vmod_dbrw.c index d6785d7..63d4ea1 100644 --- a/src/vmod_dbrw.c +++ b/src/vmod_dbrw.c @@ -191,5 +191,7 @@ parse_flags(const char *arg, int *qdisp, int *flags, char status[]) rc = 1; - } else + } else { strncpy(status, ws.ws_wordv[i] + 9, HTTP_STATUS_LEN); + status[HTTP_STATUS_LEN] = 0; + } } else if (strncmp(ws.ws_wordv[i], "R=", 2) == 0) { @@ -199,5 +201,7 @@ parse_flags(const char *arg, int *qdisp, int *flags, char status[]) rc = 1; - } else + } else { strncpy(status, ws.ws_wordv[i] + 2, HTTP_STATUS_LEN); + status[HTTP_STATUS_LEN] = 0; + } } else { @@ -505,2 +509,101 @@ findmatch(VRT_CTX, struct dbrw_connection *conn, char **param) +static int +expand_error(char **ret, char const *func, char const *msg) +{ + static char delim[] = ": "; + *ret = malloc(strlen(func) + strlen(msg) + 1); + if (*ret) { + strcat(strcat(strcpy(*ret, func), delim), msg); + return WRDSE_USERERR; + } else + return WRDSE_NOSPACE; +} + +static int +expand_urlprefixes(struct dbrw_connection *cp, char **argv, char **ret) +{ + char *arg; + size_t n, len, i, j; + char *q, *res; + + if (argv[1] == NULL || argv[2] != NULL) + return expand_error(ret, argv[0], "bad arguments"); + + /* Create a copy of the argument */ + if (cp->conf->backend->sql_escape) { + arg = sql_escape(cp, argv[1]); + } else { + arg = strdup(argv[1]); + } + if (!arg) + return WRDSE_NOSPACE; + + /* Cut off eventual query */ + i = j = strcspn(arg, "?"); + arg[i] = 0; + + /* Compute the resulting length */ + len = i; + n = 1; + for (; i > 0; i--) { + if (arg[i] == '/') { + len += i; + n++; + } + } + + /* Count quotes around each member */ + len += n * 2 + n - 1; + + /* Allocate the result */ + res = malloc(len + 1); + if (!res) { + free(arg); + return WRDSE_NOSPACE; + } + + /* Format the result */ + q = res; + i = j; + while (i) { + if (q > res) + *q++ = ','; + *q++ = '\''; + memcpy(q, arg, i); + q += i; + *q++ = '\''; + i--; + while (i > 0 && arg[i] != '/') + i--; + } + *q = 0; + *ret = res; + free(arg); + + return WRDSE_OK; +} + +static struct expcom { + char *com; + int (*exp) (struct dbrw_connection *, char **, char **); +} expcomtab[] = { + { "urlprefixes", expand_urlprefixes }, + { NULL } +}; + +static int +query_command_expand(char **ret, const char *cmd, size_t len, char **argv, + void *clos) +{ + struct expcom *ec; + static char diagmsg[] = "unknown command: "; + + for (ec = expcomtab; ec->com; ec++) { + if (strcmp(ec->com, argv[0]) == 0) + return ec->exp(clos, argv, ret); + } + + return expand_error(ret, argv[0], "unknown command"); +} + static char * @@ -511,3 +614,4 @@ do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg) char *res; - + int wsflags; + if (sql_connect(cp) || cp->state != state_connected) @@ -538,8 +642,15 @@ do_rewrite(VRT_CTX, struct dbrw_connection *cp, VCL_STRING arg) - debug(cp->conf, 2, ("expanding query")); + debug(cp->conf, 2, ("expanding query {\"%s\"}", cp->conf->query)); ws.ws_env = (const char **)wsenv.ws_wordv; - rc = wordsplit(cp->conf->query, &ws, - WRDSF_NOCMD | WRDSF_QUOTE | - WRDSF_NOSPLIT | - WRDSF_ENV | WRDSF_UNDEF); + ws.ws_command = query_command_expand; + ws.ws_closure = cp; + wsflags = WRDSF_NOSPLIT | WRDSF_CLOSURE | WRDSF_ENV | WRDSF_UNDEF; + + if (cp->conf->debug_level == 100) { + ws.ws_debug = dbrw_debug; + wsflags |= WRDSF_DEBUG | WRDSF_SHOWDBG; + } + + rc = wordsplit(cp->conf->query, &ws, wsflags); + if (rc) { diff --git a/src/wordsplit.c b/src/wordsplit.c index f4740bf..bad59b1 100644 --- a/src/wordsplit.c +++ b/src/wordsplit.c @@ -1,3 +1,3 @@ /* wordsplit - a word splitter - Copyright (C) 2009-2014 Sergey Poznyakoff + Copyright (C) 2009-2018 Sergey Poznyakoff @@ -27,2 +27,4 @@ #include <stdarg.h> +#include <pwd.h> +#include <glob.h> @@ -50,2 +52,8 @@ +#define ISVARBEG(c) (ISALPHA(c) || c == '_') +#define ISVARCHR(c) (ISALNUM(c) || c == '_') + +#define WSP_RETURN_DELIMS(wsp) \ + ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS)) + #define ALLOC_INIT 128 @@ -56,3 +64,3 @@ _wsplt_alloc_die (struct wordsplit *wsp) { - wsp->ws_error (_("memory exhausted")); + wsp->ws_error ("%s", _("memory exhausted")); abort (); @@ -60,3 +68,3 @@ _wsplt_alloc_die (struct wordsplit *wsp) -static void +static void _wsplt_error (const char *fmt, ...) @@ -74,2 +82,11 @@ static void wordsplit_free_nodes (struct wordsplit *); static int +_wsplt_seterr (struct wordsplit *wsp, int ec) +{ + wsp->ws_errno = ec; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return ec; +} + +static int _wsplt_nomem (struct wordsplit *wsp) @@ -88,2 +105,80 @@ _wsplt_nomem (struct wordsplit *wsp) +static int wordsplit_run (const char *command, size_t length, + struct wordsplit *wsp, + int flags, int lvl); + +static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags); +static int wordsplit_process_list (struct wordsplit *wsp, size_t start); +static int wordsplit_finish (struct wordsplit *wsp); + +static int +_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, + char const *str, int len, + int flags, int finalize) +{ + int rc; + + wss->ws_delim = wsp->ws_delim; + wss->ws_debug = wsp->ws_debug; + wss->ws_error = wsp->ws_error; + wss->ws_alloc_die = wsp->ws_alloc_die; + + if (!(flags & WRDSF_NOVAR)) + { + wss->ws_env = wsp->ws_env; + wss->ws_getvar = wsp->ws_getvar; + flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR); + } + if (!(flags & WRDSF_NOCMD)) + { + wss->ws_command = wsp->ws_command; + } + + if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD)) + { + wss->ws_closure = wsp->ws_closure; + flags |= wsp->ws_flags & WRDSF_CLOSURE; + } + + wss->ws_options = wsp->ws_options; + + flags |= WRDSF_DELIM + | WRDSF_ALLOC_DIE + | WRDSF_ERROR + | WRDSF_DEBUG + | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS)); + + rc = wordsplit_init (wss, str, len, flags); + if (rc) + return rc; + wss->ws_lvl = wsp->ws_lvl + 1; + rc = wordsplit_process_list (wss, 0); + if (rc) + { + wordsplit_free_nodes (wss); + return rc; + } + if (finalize) + { + rc = wordsplit_finish (wss); + wordsplit_free_nodes (wss); + } + return rc; +} + +static void +_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss) +{ + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_errno = wss->ws_errno; + if (wss->ws_errno == WRDSE_USERERR) + { + wsp->ws_usererr = wss->ws_usererr; + wss->ws_errno = WRDSE_EOF; + wss->ws_usererr = NULL; + } +} + static void @@ -95,2 +190,3 @@ wordsplit_init0 (struct wordsplit *wsp) wordsplit_free_words (wsp); + wordsplit_clearerr (wsp); } @@ -104,5 +200,6 @@ wordsplit_init0 (struct wordsplit *wsp) wsp->ws_errno = 0; - wsp->ws_head = wsp->ws_tail = NULL; } +char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + static int @@ -118,10 +215,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, - if (!(wsp->ws_flags & WRDSF_NOVAR) - && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) + if (!(wsp->ws_flags & WRDSF_NOVAR)) { - errno = EINVAL; - wsp->ws_errno = WRDSE_USAGE; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return wsp->ws_errno; + /* These will be initialized on first variable assignment */ + wsp->ws_envidx = wsp->ws_envsiz = 0; + wsp->ws_envbuf = NULL; } @@ -130,7 +224,8 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, { - errno = EINVAL; - wsp->ws_errno = WRDSE_NOSUPP; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return wsp->ws_errno; + if (!wsp->ws_command) + { + _wsplt_seterr (wsp, WRDSE_USAGE); + errno = EINVAL; + return wsp->ws_errno; + } } @@ -165,6 +260,38 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + if (!(wsp->ws_flags & WRDSF_OPTIONS)) + wsp->ws_options = 0; + + if (wsp->ws_flags & WRDSF_ESCAPE) + { + if (!wsp->ws_escape[WRDSX_WORD]) + wsp->ws_escape[WRDSX_WORD] = ""; + if (!wsp->ws_escape[WRDSX_QUOTE]) + wsp->ws_escape[WRDSX_QUOTE] = ""; + } + else + { + if (wsp->ws_flags & WRDSF_CESCAPES) + { + wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; + wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; + wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD + | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; + } + else + { + wsp->ws_escape[WRDSX_WORD] = ""; + wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\""; + wsp->ws_options |= WRDSO_BSKEEP_QUOTE; + } + } + wsp->ws_endp = 0; + wsp->ws_wordi = 0; + if (wsp->ws_flags & WRDSF_REUSE) + wordsplit_free_nodes (wsp); + wsp->ws_head = wsp->ws_tail = NULL; + wordsplit_init0 (wsp); - + return 0; @@ -211,2 +338,3 @@ alloc_space (struct wordsplit *wsp, size_t count) #define _WSNF_SEXP 0x20 /* is a sed expression */ +#define _WSNF_DELIM 0x40 /* node is a delimiter */ @@ -235,3 +363,3 @@ wsnode_flagstr (int flags) { - static char retbuf[6]; + static char retbuf[7]; char *p = retbuf; @@ -260,2 +388,6 @@ wsnode_flagstr (int flags) *p++ = '-'; + if (flags & _WSNF_DELIM) + *p++ = 'd'; + else + *p++ = '-'; *p = 0; @@ -340,2 +472,10 @@ wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +static struct wordsplit_node * +wsnode_tail (struct wordsplit_node *p) +{ + while (p && p->next) + p = p->next; + return p; +} + static void @@ -355,5 +495,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, { + struct wordsplit_node *tail = wsnode_tail (node); node->prev = NULL; - node->next = anchor; - anchor->prev = node; + tail->next = anchor; + anchor->prev = tail; wsp->ws_head = node; @@ -364,2 +505,3 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, struct wordsplit_node *p; + struct wordsplit_node *tail = wsnode_tail (node); @@ -367,6 +509,6 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, if (p) - p->prev = node; + p->prev = tail; else - wsp->ws_tail = node; - node->next = p; + wsp->ws_tail = tail; + tail->next = p; node->prev = anchor; @@ -417,6 +559,8 @@ wordsplit_dump_nodes (struct wordsplit *wsp) if (p->flags & _WSNF_WORD) - wsp->ws_debug ("%4d: %p: %#04x (%s):%s;", + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;", + wsp->ws_lvl, n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); else - wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;", + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;", + wsp->ws_lvl, n, p, p->flags, wsnode_flagstr (p->flags), @@ -435,2 +579,5 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) + if (!(node->flags & _WSNF_JOIN)) + return 0; + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) @@ -459,2 +606,3 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) { + node->flags |= p->flags & _WSNF_QUOTE; wsnode_remove (wsp, p); @@ -478,2 +626,6 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, + char *dst, const char *src, + size_t n); + static int @@ -482,5 +634,2 @@ wsnode_quoteremoval (struct wordsplit *wsp) struct wordsplit_node *p; - void (*uqfn) (char *, const char *, size_t) = - (wsp->ws_flags & WRDSF_CESCAPES) ? - wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; @@ -493,5 +642,3 @@ wsnode_quoteremoval (struct wordsplit *wsp) if (wsp->ws_flags & WRDSF_QUOTE) - { - unquote = !(p->flags & _WSNF_NOEXPAND); - } + unquote = !(p->flags & _WSNF_NOEXPAND); else @@ -512,7 +659,4 @@ wsnode_quoteremoval (struct wordsplit *wsp) - if (wsp->ws_flags & WRDSF_ESCAPE) - wordsplit_general_unquote_copy (p->v.word, str, slen, - wsp->ws_escape); - else - uqfn (p->v.word, str, slen); + wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE, + p->v.word, str, slen); } @@ -537,2 +681,21 @@ wsnode_coalesce (struct wordsplit *wsp) static int +wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->next) + { + struct wordsplit_node *np = p; + while (np && np->next) + { + np->flags |= _WSNF_JOIN; + np = np->next; + } + if (coalesce_segment (wsp, p)) + return 1; + } + return 0; +} + +static size_t skip_delim (struct wordsplit *wsp); + +static int wordsplit_finish (struct wordsplit *wsp) @@ -541,15 +704,133 @@ wordsplit_finish (struct wordsplit *wsp) size_t n; + int delim; - n = 0; + /* Postprocess delimiters. It would be rather simple, if it weren't for + the incremental operation. - for (p = wsp->ws_head; p; p = p->next) - n++; + Nodes of type _WSNF_DELIM get inserted to the node list if either + WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set. + + The following cases should be distinguished: + + 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress + any runs of similar delimiter nodes to a single node. The nodes are + 'similar' if they point to the same delimiter character. + + If WRDSO_MAXWORDS option is set, stop compressing when + ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into + a single last node. + + 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not, + remove any delimiter nodes. Stop operation when + ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into + a single last node. + + 3. If incremental operation is in progress, restart the loop any time + a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS + is set. + */ + again: + delim = 0; /* Delimiter being processed (if any) */ + n = 0; /* Number of words processed so far */ + p = wsp->ws_head; /* Current node */ + + while (p) + { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_DELIM) + { + if (wsp->ws_flags & WRDSF_RETURN_DELIMS) + { + if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) + { + char const *s = wsnode_ptr (wsp, p); + if (delim) + { + if (delim == *s) + { + wsnode_remove (wsp, p); + p = next; + continue; + } + else + { + delim = 0; + n++; /* Count this node; it will be returned */ + } + } + else + { + delim = *s; + p = next; + continue; + } + } + } + else if (wsp->ws_options & WRDSO_MAXWORDS) + { + wsnode_remove (wsp, p); + p = next; + continue; + } + } + else + { + if (delim) + { + /* Last node was a delimiter or a compressed run of delimiters; + Count it, and clear the delimiter marker */ + n++; + delim = 0; + } + if (wsp->ws_options & WRDSO_MAXWORDS) + { + if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords) + break; + } + } + n++; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + p = NULL; /* Break the loop */ + else + p = next; + } + + if (p) + { + /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords + words have already been collected. Reconstruct a single final + node from the remaining nodes. */ + if (wsnode_tail_coalesce (wsp, p)) + return wsp->ws_errno; + n++; + } + + if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL)) + { + /* The loop above have eliminated all nodes. Restart the + processing, if there's any input left. */ + if (wsp->ws_endp < wsp->ws_len) + { + int rc; + if (wsp->ws_flags & WRDSF_SHOWDBG) + wsp->ws_debug (_("Restarting")); + rc = wordsplit_process_list (wsp, skip_delim (wsp)); + if (rc) + return rc; + } + else + { + wsp->ws_error = WRDSE_EOF; + return WRDSE_EOF; + } + goto again; + } if (alloc_space (wsp, n + 1)) - return 1; + return wsp->ws_errno; - for (p = wsp->ws_head; p; p = p->next) + while (wsp->ws_head) { - const char *str = wsnode_ptr (wsp, p); - size_t slen = wsnode_len (p); + const char *str = wsnode_ptr (wsp, wsp->ws_head); + size_t slen = wsnode_len (wsp->ws_head); char *newstr = malloc (slen + 1); @@ -565,4 +846,9 @@ wordsplit_finish (struct wordsplit *wsp) + wsnode_remove (wsp, wsp->ws_head); + wsp->ws_wordc++; + wsp->ws_wordi++; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + break; } @@ -572,3 +858,31 @@ wordsplit_finish (struct wordsplit *wsp) +int +wordsplit_append (wordsplit_t *wsp, int argc, char **argv) +{ + int rc; + size_t i; + rc = alloc_space (wsp, wsp->ws_wordc + argc + 1); + if (rc) + return rc; + for (i = 0; i < argc; i++) + { + char *newstr = strdup (argv[i]); + if (!newstr) + { + while (i > 0) + { + free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]); + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL; + i--; + } + return _wsplt_nomem (wsp); + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr; + } + wsp->ws_wordc += i; + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + return 0; +} + /* Variable expansion */ @@ -609,6 +923,6 @@ node_split_prefix (struct wordsplit *wsp, static int -find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) +find_closing_paren (const char *str, size_t i, size_t len, size_t *poff, + char const *paren) { - enum - { st_init, st_squote, st_dquote } state = st_init; + enum { st_init, st_squote, st_dquote } state = st_init; size_t level = 1; @@ -622,14 +936,19 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) { - case '{': - level++; - break; - - case '}': - if (--level == 0) + default: + if (str[i] == paren[0]) + { + level++; + break; + } + else if (str[i] == paren[1]) { - *poff = i; - return 0; + if (--level == 0) + { + *poff = i; + return 0; + } + break; } break; - + case '"': @@ -660,4 +979,5 @@ find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) -static const char * -wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +static int +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len, + char const **ret) { @@ -666,3 +986,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (!(wsp->ws_flags & WRDSF_ENV)) - return NULL; + return WRDSE_UNDEF; @@ -675,3 +995,6 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) - return wsp->ws_env[i + 1]; + { + *ret = wsp->ws_env[i + 1]; + return WRDSE_OK; + } /* Skip the value. Break the loop if it is NULL. */ @@ -682,3 +1005,3 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) } - else + else if (wsp->ws_env) { @@ -694,6 +1017,113 @@ wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) if (j == len && var[j] == '=') - return var + j + 1; + { + *ret = var + j + 1; + return WRDSE_OK; + } } } - return NULL; + return WRDSE_UNDEF; +} + +static int +wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen, + char *value) +{ + int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1; + char *v; + + if (wsp->ws_envidx + n >= wsp->ws_envsiz) + { + size_t sz; + char **newenv; + + if (!wsp->ws_envbuf) + { + if (wsp->ws_flags & WRDSF_ENV) + { + size_t i = 0, j; + + if (wsp->ws_env) + { + for (; wsp->ws_env[i]; i++) + ; + } + + sz = i + n + 1; + + newenv = calloc (sz, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + + for (j = 0; j < i; j++) + { + newenv[j] = strdup (wsp->ws_env[j]); + if (!newenv[j]) + { + for (; j > 1; j--) + free (newenv[j-1]); + free (newenv[j-1]); + return _wsplt_nomem (wsp); + } + } + newenv[j] = NULL; + + wsp->ws_envbuf = newenv; + wsp->ws_envidx = i; + wsp->ws_envsiz = sz; + wsp->ws_env = (const char**) wsp->ws_envbuf; + } + else + { + newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_envidx = 0; + wsp->ws_envsiz = WORDSPLIT_ENV_INIT; + wsp->ws_env = (const char**) wsp->ws_envbuf; + wsp->ws_flags |= WRDSF_ENV; + } + } + else + { + wsp->ws_envsiz *= 2; + newenv = realloc (wsp->ws_envbuf, + wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0])); + if (!newenv) + return _wsplt_nomem (wsp); + wsp->ws_envbuf = newenv; + wsp->ws_env = (const char**) wsp->ws_envbuf; + } + } + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + char *p = malloc (namelen + 1); + if (!p) + return _wsplt_nomem (wsp); + memcpy (p, name, namelen); + p[namelen] = 0; + + v = strdup (value); + if (!v) + { + free (p); + return _wsplt_nomem (wsp); + } + wsp->ws_env[wsp->ws_envidx++] = p; + wsp->ws_env[wsp->ws_envidx++] = v; + } + else + { + v = malloc (namelen + strlen(value) + 2); + if (!v) + return _wsplt_nomem (wsp); + memcpy (v, name, namelen); + v[namelen++] = '='; + strcpy(v + namelen, value); + wsp->ws_env[wsp->ws_envidx++] = v; + } + wsp->ws_env[wsp->ws_envidx++] = NULL; + return WRDSE_OK; } @@ -706,3 +1136,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, const char *defstr = NULL; - const char *value; + char *value; const char *vptr; @@ -710,7 +1140,9 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, const char *start = str - 1; - - if (ISALPHA (str[0]) || str[0] == '_') + int rc; + struct wordsplit ws; + + if (ISVARBEG (str[0])) { for (i = 1; i < len; i++) - if (!(ISALNUM (str[i]) || str[i] == '_')) + if (!ISVARCHR (str[i])) break; @@ -723,26 +1155,32 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, for (i = 1; i < len; i++) - if (str[i] == '}' || str[i] == ':') - break; - if (str[i] == ':') { - size_t j; - - defstr = str + i + 1; - if (find_closing_cbrace (str, i + 1, len, &j)) + if (str[i] == ':') { - wsp->ws_errno = WRDSE_CBRACE; - return 1; + size_t j; + + defstr = str + i + 1; + if (find_closing_paren (str, i + 1, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + break; + } + else if (strchr ("-+?=", str[i])) + { + size_t j; + + defstr = str + i; + if (find_closing_paren (str, i, len, &j, "{}")) + return _wsplt_seterr (wsp, WRDSE_CBRACE); + *pend = str + j; + break; } - *pend = str + j; - } - else if (str[i] == '}') - { - defstr = NULL; - *pend = str + i; - } - else - { - wsp->ws_errno = WRDSE_CBRACE; - return 1; } + if (i == len) + return _wsplt_seterr (wsp, WRDSE_CBRACE); } @@ -770,28 +1208,140 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, - vptr = wordsplit_find_env (wsp, str, i); - if (vptr) + if (defstr && strchr("-+?=", defstr[0]) == 0) { - value = strdup (vptr); - if (!value) - return _wsplt_nomem (wsp); + rc = WRDSE_UNDEF; + defstr = NULL; } - else if (wsp->ws_flags & WRDSF_GETVAR) - value = wsp->ws_getvar (str, i, wsp->ws_closure); - else if (wsp->ws_flags & WRDSF_UNDEF) + else { - wsp->ws_errno = WRDSE_UNDEF; - if (wsp->ws_flags & WRDSF_SHOWERR) - wordsplit_perror (wsp); - return 1; + rc = wordsplit_find_env (wsp, str, i, &vptr); + if (rc == WRDSE_OK) + { + if (vptr) + { + value = strdup (vptr); + if (!value) + rc = WRDSE_NOSPACE; + } + else + rc = WRDSE_UNDEF; + } + else if (wsp->ws_flags & WRDSF_GETVAR) + rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure); + else |