diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2019-07-24 13:19:20 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-07-24 13:36:25 +0300 |
commit | 7eaa3c45bedf204a274f5de2da4269d510d2bd56 (patch) | |
tree | 17977566b8a2ca6ed722ea75166cd0330ad30e81 | |
parent | 6a7581f2e60a600a4915e4f55b74a15c80701978 (diff) | |
download | wordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.gz wordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.bz2 |
Allow the caller to modify variable name constituents.
This new feature makes it possible to expand variables with
dots or other unusual characters in their names.
* README: Update.
* wordsplit.3: Document the use of the ws_namechar member.
* wordsplit.c (is_name_char): New static function.
(ISVARCHR): Removed. Use is_name_char instead.
(_wsplt_seterr): Set errno to EINVAL if WRDSE_USAGE is returned.
(_wsplt_subsplit): Clear the WRDSO_MAXWORDS option in the subsplit.
Pass ws_namechar.
(wordsplit_init): Check for valid ws_namechar content.
(expvar): Take into account ws_namechar when scanning variable name.
Fix name length passed to the _wsplt_setctxerr call.
* wordsplit.h (ws_namechar): New member.
(WRDSO_NAMECHAR): New option bit.
* wsp.c: New options: -D to define an "extra" environment entry
(possibly containing characters not allowed by the shell), and
-namechar to define additional variable name constituents.
* wordsplit.at: Test namechar modifications.
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | wordsplit.3 | 63 | ||||
-rw-r--r-- | wordsplit.at | 132 | ||||
-rw-r--r-- | wordsplit.c | 119 | ||||
-rw-r--r-- | wordsplit.h | 6 | ||||
-rw-r--r-- | wsp.c | 118 |
6 files changed, 362 insertions, 80 deletions
@@ -232,4 +232,4 @@ another project without incurring unnecessary overhead. -Currently the work is underway on incorporating it into existing -projects. +By the end of July 2019, all mentioned packages switched to using +wordsplit as a submodule. diff --git a/wordsplit.3 b/wordsplit.3 index e742030..337170f 100644 --- a/wordsplit.3 +++ b/wordsplit.3 @@ -16,3 +16,3 @@ .\" -.TH WORDSPLIT 3 "July 9, 2019" "WORDSPLIT" "Wordsplit User Reference" +.TH WORDSPLIT 3 "July 24, 2019" "WORDSPLIT" "Wordsplit User Reference" .SH NAME @@ -462,2 +462,39 @@ following a slash must be matched explicitly, unless the \fBWRDSO_DOTGLOB\fR option is set. +.SH VARIABLE NAMES +By default a shell-like lexical structure of a variable name is +assumed. A valid variable name begins with an alphabetical +character or underscore and contains alphabetical characters, digits +and underscores. +.PP +The set of characters that constitute a variable name can be +augmented. To do so, initialize the \fBws_namechar\fR member to the +C string containing the characters to be added, set the +\fBWRDSO_NAMECHAR\fR bit in \fBws_options\fR and set the +\fBWRDSF_OPTIONS\fR bit in the \fIflags\fR argument. +.PP +For example, to allow semicolon in variable names, do: +.PP +.EX +struct wordsplit ws; +ws.ws_namechar = ":"; +ws.ws_options = WRDSO_NAMECHAR; +wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS); +.EE +.PP +Certain characters cannot be allowed to be a name costituent. These +are: +.BR $ , +.BR { , +.BR } , +.BR * , +.BR @ , +.BR \- , +.BR + , +.BR ? , +and +.BR = . +If any of these appears in \fBws_namechar\fR, the \fBwordsplit\fR (and +\fBwordsplit_len\fR) function will return the +.B WRDSE_USAGE +error. .SH LIMITING THE NUMBER OF WORDS @@ -610,2 +647,14 @@ flag must be set if this member is initialized. .TP +.BI "const char *" ws_namechar +Lists characters that are allowed in a variable name, in addition to +alphanumerics and underscore. The +.B WRDSO_NAMECHAR +bit must be set in +.B ws_options +for this to take effect. +.sp +See the chapter +.BR "VARIABLE NAMES" , +for a detailed discussion. +.TP .BI "void (*" ws_alloc_die ") (wordsplit_t *)" @@ -986,2 +1035,14 @@ with index \fB\fIws_paramc\fR \- \fIN\fR, i.e. \fIN\fRth if counting from the end. +.TP +.B WRDSO_NAMECHAR +When set, indicates that the +.B ws_namechar +member of the +.B wordsplit_t +struct has been initialized. +.sp +This member allows you to modify the notion of what characters can be +part of a valid variable name. See the chapter +.BR "VARIABLE NAMES" , +for a detailed discussion. .SH "ERROR CODES" diff --git a/wordsplit.at b/wordsplit.at index d7d8bc9..aa2c87d 100644 --- a/wordsplit.at +++ b/wordsplit.at @@ -1023,2 +1023,134 @@ TOTAL: 1 +# Namechar modification tests + +TESTWSP([namechar modification],[], +[-namechar ".:" -Dx.y=one -Dx:foo=bar], +[one is $x.y, foo is $x:foo], +[NF: 6 +0: one +1: is +2: one, +3: foo +4: is +5: bar +TOTAL: 6 +]) + +AT_BANNER([namechar modification]) +TESTWSP([default value],[], +[-namechar ":."], +[${x:foo:-bar}], +[NF: 1 +0: bar +TOTAL: 1 +]) + +TESTWSP([default value (defined)],[], +[-namechar ":." -Dx:foo=qux], +[${x:foo:-bar}], +[NF: 1 +0: qux +TOTAL: 1 +], +[]) + +TESTWSP([default value (:- null)],[], +[-namechar ":." -Dx:foo=], +[${x:foo:-bar}], +[NF: 1 +0: bar +TOTAL: 1 +], +[]) + +TESTWSP([default value (- null)],[], +[-namechar ":." -Dx:foo=], +[${x:foo-bar}], +[NF: 0 +TOTAL: 0 +], +[]) + +TESTWSP([default value (- null, unset)],[], +[-namechar ":."], +[${x:foo-bar}], +[NF: 1 +0: bar +TOTAL: 1 +]) + +TESTWSP([assign default values],[], +[-namechar ":."], +[${x:foo=bar} +$x:foo], +[NF: 1 +0: bar +TOTAL: 1 +NF: 1 +0: bar +TOTAL: 1 +]) + +TESTWSP([default error message (var defined)],[], +[-namechar ":." -Dx:foo=bar], +[a ${x:foo:?} test], +[NF: 3 +0: a +1: bar +2: test +TOTAL: 3 +]) + +TESTWSP([default error message],[], +[-namechar ":."], +[${x:foo:?}], +[NF: 0 +TOTAL: 0 +], +[x:foo: variable null or not set +]) + +TESTWSP([custom error message (defined)],[wsp-custom-err wsp-custom-err03], +[-namechar ":." -Dx:foo=bar], +[a ${x:foo:?please define it} test], +[NF: 3 +0: a +1: bar +2: test +TOTAL: 3 +]) + +TESTWSP([custom error message],[wsp-custom-err wsp-custom-err04], +[-namechar ":."], +[a ${x:foo:?please define it} test], +[NF: 2 +0: a +1: test +TOTAL: 2 +], +[x:foo: please define it +]) + +TESTWSP([alternate value (defined)],[wsp-alt wsp-alt02], +[-namechar ":." -Dx:foo=bar], +[a ${x:foo:+isset} test], +[NF: 3 +0: a +1: isset +2: test +TOTAL: 3 +], +[], +[FOO=bar]) + +TESTWSP([alternate value],[wsp-alt wsp-alt03], +[-namechar ":."], +[a ${x:foo:+isset} test], +[NF: 2 +0: a +1: test +TOTAL: 2 +]) + + m4_popdef([TESTWSP]) diff --git a/wordsplit.c b/wordsplit.c index d3ec9e1..99a8b4f 100644 --- a/wordsplit.c +++ b/wordsplit.c @@ -54,3 +54,10 @@ #define ISVARBEG(c) (ISALPHA(c) || c == '_') -#define ISVARCHR(c) (ISALNUM(c) || c == '_') +static inline int +is_name_char (struct wordsplit *wsp, int c) +{ + return ISALNUM (c) + || c == '_' + || ((wsp->ws_options & WRDSO_NAMECHAR) + && strchr (wsp->ws_namechar, c)); +} @@ -94,2 +101,4 @@ _wsplt_seterr (struct wordsplit *wsp, int ec) wordsplit_perror (wsp); + if (ec == WRDSE_USAGE) + errno = EINVAL; return ec; @@ -174,4 +183,5 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, - wss->ws_options = wsp->ws_options; - + wss->ws_options = wsp->ws_options & ~WRDSO_MAXWORDS; + wss->ws_namechar = wsp->ws_namechar; + flags |= WRDSF_DELIM @@ -262,7 +272,3 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, if (!wsp->ws_command) - { - _wsplt_seterr (wsp, WRDSE_USAGE); - errno = EINVAL; - return wsp->ws_errno; - } + return _wsplt_seterr (wsp, WRDSE_USAGE); } @@ -335,2 +341,10 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + if (wsp->ws_options & WRDSO_NAMECHAR) + { + if (wsp->ws_namechar[strcspn(wsp->ws_namechar, "${}*@-+?=")]) + return _wsplt_seterr (wsp, WRDSE_USAGE); + } + else + wsp->ws_namechar = NULL; + wsp->ws_endp = 0; @@ -1389,3 +1403,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, for (i = 1; i < len; i++) - if (!ISVARCHR (str[i])) + if (!is_name_char (wsp, str[i])) break; @@ -1431,17 +1445,8 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, { + int i0 = str[0] == '-' ? 1 : 0; str++; len--; - for (i = str[0] == '-' ? 1 : 0; i < len; i++) + for (i = i0; i < len; i++) { - if (str[i] == ':') - { - size_t j; - - defstr = str + i + 1; - if (find_closing_paren (str, i + 1, len, &j, "{}")) - return _wsplt_seterr (wsp, WRDSE_CBRACE); - *pend = str + j; - break; - } - else if (str[i] == '}') + if (str[i] == '}') { @@ -1458,2 +1463,4 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, return _wsplt_seterr (wsp, WRDSE_CBRACE); + if (i > i0 + 1 && str[i-1] == ':') + i--; *pend = str + j; @@ -1475,4 +1482,6 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, } - else if (!ISVARCHR (str[i])) + else if (!is_name_char (wsp, str[i])) { + if (str[i] == ':' && i + 1 < len && strchr ("-+?=", str[i+1])) + continue; return expvar_recover (wsp, str - 1, ptail, pend, flg); @@ -1497,6 +1506,14 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, - if (defstr && strchr("-+?=", defstr[0]) == 0) + if (is_param) { - rc = WRDSE_UNDEF; - defstr = NULL; + if (param_idx >= 0 && param_idx < wsp->ws_paramc) + { + value = strdup (wsp->ws_paramv[param_idx]); + if (!value) + rc = WRDSE_NOSPACE; + else + rc = WRDSE_OK; + } + else + rc = WRDSE_UNDEF; } @@ -1504,43 +1521,27 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, { - if (is_param) + if (wsp->ws_flags & WRDSF_GETVAR) { - if (param_idx >= 0 && param_idx < wsp->ws_paramc) + if (wsp->ws_options & WRDSO_GETVARPREF) { - value = strdup (wsp->ws_paramv[param_idx]); - if (!value) - rc = WRDSE_NOSPACE; - else - rc = WRDSE_OK; + rc = wsplt_env_getvar (wsp, str, i, &value); + if (rc == WRDSE_UNDEF) + rc = wsplt_env_lookup (wsp, str, i, &value); } else - rc = WRDSE_UNDEF; - } - else - { - if (wsp->ws_flags & WRDSF_GETVAR) { - if (wsp->ws_options & WRDSO_GETVARPREF) - { - rc = wsplt_env_getvar (wsp, str, i, &value); - if (rc == WRDSE_UNDEF) - rc = wsplt_env_lookup (wsp, str, i, &value); - } - else - { - rc = wsplt_env_lookup (wsp, str, i, &value); - if (rc == WRDSE_UNDEF) - rc = wsplt_env_getvar (wsp, str, i, &value); - } + rc = wsplt_env_lookup (wsp, str, i, &value); + if (rc == WRDSE_UNDEF) + rc = wsplt_env_getvar (wsp, str, i, &value); } - else - rc = wsplt_env_lookup (wsp, str, i, &value); } + else + rc = wsplt_env_lookup (wsp, str, i, &value); + } - if (rc == WRDSE_OK - && (!value || value[0] == 0) - && defstr && defstr[-1] == ':') - { - free (value); - rc = WRDSE_UNDEF; - } + if (rc == WRDSE_OK + && (!value || value[0] == 0) + && defstr && defstr[-1] == ':') + { + free (value); + rc = WRDSE_UNDEF; } @@ -1630,3 +1631,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len, { - _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, *pend - str + 1); + _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, i); return 1; diff --git a/wordsplit.h b/wordsplit.h index 3451979..7c14cea 100644 --- a/wordsplit.h +++ b/wordsplit.h @@ -54,2 +54,6 @@ struct wordsplit with backslash. */ + const char *ws_namechar; /* [Input] (WRDSO_NAMECHAR) Characters that can + be parts of a variable name, in addition to + alphanumerics and underscore. */ + void (*ws_alloc_die) (wordsplit_t *wsp); @@ -249,2 +253,4 @@ struct wordsplit #define WRDSO_PARAM_NEGIDX 0x00008000 +/* ws_namechar member is initialized */ +#define WRDSO_NAMECHAR 0x00010000 @@ -43,3 +43,10 @@ enum env_type env_null, /* Null environment */ - env_sys /* Use system environment */ + env_sys, /* Use system environment */ + env_extra /* Use small built-in "extra" environment */ + }; + +enum + { + MAX_F_ENV = 16, + MAX_X_ENV = 16 }; @@ -56,5 +63,10 @@ struct wsclosure WRDSF_DOOFFS flag. */ - char **fenvbase; /* Environment for testing the ws_getenv function */ + char *fenvbase[MAX_F_ENV+1]; + /* Environment for testing the ws_getenv function */ int fenvidx; /* Number of variables in fenvbase */ - int fenvmax; /* Size of fenbase (entries) */ + + char *xenvbase[MAX_X_ENV+1]; + /* Extra environment variables */ + int xenvidx; /* Number of variables in xenvbase */ + int append_start; /* First argument to append (index in argv) */ @@ -112,3 +124,3 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc) { - assert (wsc->fenvidx < wsc->fenvmax - 1); + assert (wsc->fenvidx < MAX_F_ENV); wsc->fenvbase[wsc->fenvidx++] = opt; @@ -120,2 +132,27 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc) + if (strncmp (opt, "-D", 2) == 0) + { + char *asgn; + + if (opt[2]) + asgn = opt + 2; + else if (wsoptind == argc) + { + fprintf (stderr, "%s: missing arguments for -D\n", + progname); + exit (1); + } + else + asgn = argv[wsoptind++]; + + if (strchr (asgn, '=')) + { + assert (wsc->xenvidx < MAX_F_ENV); + wsc->xenvbase[wsc->xenvidx++] = asgn; + return 0; + } + wsoptind--; + return EOF; + } + if (strcmp (opt, "--version") == 0) @@ -308,2 +345,10 @@ setfn_maxwords (int flag, int neg, char *arg, struct wsclosure *wsc) static void +setfn_namechar (int flag, int neg, char *arg, struct wsclosure *wsc) +{ + wsc->wsflags |= WRDSF_OPTIONS; + wsc->ws.ws_options |= WRDSO_NAMECHAR; + wsc->ws.ws_namechar = arg; +} + +static void setfn_global (int flag, int neg, char *arg, struct wsclosure *wsc) @@ -325,2 +370,4 @@ setfn_env (int flag, int neg, char *arg, struct wsclosure *wsc) wsc->env_type = env_sys; + else if (strcmp (arg, "extra") == 0) + wsc->env_type = env_extra; else @@ -402,2 +449,3 @@ struct wsopt opttab[] = { { "maxwords", WRDSO_MAXWORDS, ws_required_argument, setfn_maxwords }, + { "namechar", WRDSO_NAMECHAR, ws_required_argument, setfn_namechar }, /* String options */ @@ -422,3 +470,3 @@ help (void) - printf ("usage: %s [options] [VAR=VALUE...] [-- EXTRA...]\n", progname); + printf ("usage: %s [options] [-D VAR=VALUE ...] [VAR=VALUE...] [-- EXTRA...]\n", progname); printf ("options are:\n"); @@ -482,3 +530,3 @@ print_qword (const char *word, int plaintext) static char ** -make_env_kv () +make_env_kv (char **origenv) { @@ -488,3 +536,3 @@ make_env_kv () /* Count the number of entries */ - for (i = 0; environ[i]; i++) + for (i = 0; origenv[i]; i++) ; @@ -495,11 +543,11 @@ make_env_kv () - for (i = j = 0; environ[i]; i++) + for (i = j = 0; origenv[i]; i++) { - size_t len = strcspn (environ[i], "="); + size_t len = strcspn (origenv[i], "="); char *p = malloc (len+1); assert (p != NULL); - memcpy (p, environ[i], len); + memcpy (p, origenv[i], len); p[len] = 0; newenv[j++] = p; - p = strdup (environ[i] + len + 1); + p = strdup (origenv[i] + len + 1); assert (p != NULL); @@ -628,3 +676,2 @@ main (int argc, char **argv) struct wsclosure wsc; - char *fenvbase[128]; char buf[1024], *ptr, *saved_ptr; @@ -636,5 +683,4 @@ main (int argc, char **argv) wsc.offarg = 0; - wsc.fenvbase = fenvbase; - wsc.fenvmax = sizeof (fenvbase) / sizeof (fenvbase[0]); wsc.fenvidx = 0; + wsc.xenvidx = 0; wsc.ws.ws_options = 0; @@ -649,8 +695,10 @@ main (int argc, char **argv) + wsc.fenvbase[wsc.fenvidx] = NULL; + wsc.xenvbase[wsc.xenvidx] = NULL; + if (wsc.fenvidx > 0) { - wsc.fenvbase[wsc.fenvidx] = NULL; wsc.wsflags |= WRDSF_GETVAR | WRDSF_CLOSURE; wsc.ws.ws_getvar = wsp_getvar; - wsc.ws.ws_closure = fenvbase; + wsc.ws.ws_closure = wsc.fenvbase; } @@ -676,7 +724,41 @@ main (int argc, char **argv) case env_sys: + { + char **newenv; + + if (wsc.xenvidx) + { + size_t i, j; + for (i = 0; environ[i]; i++) + ; + newenv = calloc (i + wsc.xenvidx + 1, sizeof (*newenv)); + assert (newenv != NULL); + for (i = 0; environ[i]; i++) + { + newenv[i] = strdup (environ[i]); + assert (newenv[i] != NULL); + } + for (j = 0; j < wsc.xenvidx; j++, i++) + { + newenv[i] = strdup (wsc.xenvbase[j]); + assert (newenv[i] != NULL); + } + newenv[i] = NULL; + } + else + newenv = environ; + + wsc.wsflags |= WRDSF_ENV; + if (wsc.wsflags & WRDSF_ENV_KV) + wsc.ws.ws_env = (const char **) make_env_kv (newenv); + else + wsc.ws.ws_env = (const char **) newenv; + } + break; + + case env_extra: wsc.wsflags |= WRDSF_ENV; if (wsc.wsflags & WRDSF_ENV_KV) - wsc.ws.ws_env = (const char **) make_env_kv (); + wsc.ws.ws_env = (const char **) make_env_kv (wsc.xenvbase); else - wsc.ws.ws_env = (const char **) environ; + wsc.ws.ws_env = (const char **) wsc.xenvbase; break; |