aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2019-07-24 13:19:20 +0300
committerSergey Poznyakoff <gray@gnu.org>2019-07-24 13:36:25 +0300
commit7eaa3c45bedf204a274f5de2da4269d510d2bd56 (patch)
tree17977566b8a2ca6ed722ea75166cd0330ad30e81
parent6a7581f2e60a600a4915e4f55b74a15c80701978 (diff)
downloadwordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.gz
wordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.bz2
Allow the caller to modify variable name constituents.
This new feature makes it possible to expand variables with dots or other unusual characters in their names. * README: Update. * wordsplit.3: Document the use of the ws_namechar member. * wordsplit.c (is_name_char): New static function. (ISVARCHR): Removed. Use is_name_char instead. (_wsplt_seterr): Set errno to EINVAL if WRDSE_USAGE is returned. (_wsplt_subsplit): Clear the WRDSO_MAXWORDS option in the subsplit. Pass ws_namechar. (wordsplit_init): Check for valid ws_namechar content. (expvar): Take into account ws_namechar when scanning variable name. Fix name length passed to the _wsplt_setctxerr call. * wordsplit.h (ws_namechar): New member. (WRDSO_NAMECHAR): New option bit. * wsp.c: New options: -D to define an "extra" environment entry (possibly containing characters not allowed by the shell), and -namechar to define additional variable name constituents. * wordsplit.at: Test namechar modifications.
-rw-r--r--README4
-rw-r--r--wordsplit.363
-rw-r--r--wordsplit.at132
-rw-r--r--wordsplit.c119
-rw-r--r--wordsplit.h6
-rw-r--r--wsp.c118
6 files changed, 362 insertions, 80 deletions
diff --git a/README b/README
index 96ffbec..16f2c04 100644
--- a/README
+++ b/README
@@ -232,4 +232,4 @@ another project without incurring unnecessary overhead.
-Currently the work is underway on incorporating it into existing
-projects.
+By the end of July 2019, all mentioned packages switched to using
+wordsplit as a submodule.
diff --git a/wordsplit.3 b/wordsplit.3
index e742030..337170f 100644
--- a/wordsplit.3
+++ b/wordsplit.3
@@ -16,3 +16,3 @@
.\"
-.TH WORDSPLIT 3 "July 9, 2019" "WORDSPLIT" "Wordsplit User Reference"
+.TH WORDSPLIT 3 "July 24, 2019" "WORDSPLIT" "Wordsplit User Reference"
.SH NAME
@@ -462,2 +462,39 @@ following a slash must be matched explicitly, unless
the \fBWRDSO_DOTGLOB\fR option is set.
+.SH VARIABLE NAMES
+By default a shell-like lexical structure of a variable name is
+assumed. A valid variable name begins with an alphabetical
+character or underscore and contains alphabetical characters, digits
+and underscores.
+.PP
+The set of characters that constitute a variable name can be
+augmented. To do so, initialize the \fBws_namechar\fR member to the
+C string containing the characters to be added, set the
+\fBWRDSO_NAMECHAR\fR bit in \fBws_options\fR and set the
+\fBWRDSF_OPTIONS\fR bit in the \fIflags\fR argument.
+.PP
+For example, to allow semicolon in variable names, do:
+.PP
+.EX
+struct wordsplit ws;
+ws.ws_namechar = ":";
+ws.ws_options = WRDSO_NAMECHAR;
+wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS);
+.EE
+.PP
+Certain characters cannot be allowed to be a name costituent. These
+are:
+.BR $ ,
+.BR { ,
+.BR } ,
+.BR * ,
+.BR @ ,
+.BR \- ,
+.BR + ,
+.BR ? ,
+and
+.BR = .
+If any of these appears in \fBws_namechar\fR, the \fBwordsplit\fR (and
+\fBwordsplit_len\fR) function will return the
+.B WRDSE_USAGE
+error.
.SH LIMITING THE NUMBER OF WORDS
@@ -610,2 +647,14 @@ flag must be set if this member is initialized.
.TP
+.BI "const char *" ws_namechar
+Lists characters that are allowed in a variable name, in addition to
+alphanumerics and underscore. The
+.B WRDSO_NAMECHAR
+bit must be set in
+.B ws_options
+for this to take effect.
+.sp
+See the chapter
+.BR "VARIABLE NAMES" ,
+for a detailed discussion.
+.TP
.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
@@ -986,2 +1035,14 @@ with index \fB\fIws_paramc\fR \- \fIN\fR, i.e. \fIN\fRth if counting
from the end.
+.TP
+.B WRDSO_NAMECHAR
+When set, indicates that the
+.B ws_namechar
+member of the
+.B wordsplit_t
+struct has been initialized.
+.sp
+This member allows you to modify the notion of what characters can be
+part of a valid variable name. See the chapter
+.BR "VARIABLE NAMES" ,
+for a detailed discussion.
.SH "ERROR CODES"
diff --git a/wordsplit.at b/wordsplit.at
index d7d8bc9..aa2c87d 100644
--- a/wordsplit.at
+++ b/wordsplit.at
@@ -1023,2 +1023,134 @@ TOTAL: 1
+# Namechar modification tests
+
+TESTWSP([namechar modification],[],
+[-namechar ".:" -Dx.y=one -Dx:foo=bar],
+[one is $x.y, foo is $x:foo],
+[NF: 6
+0: one
+1: is
+2: one,
+3: foo
+4: is
+5: bar
+TOTAL: 6
+])
+
+AT_BANNER([namechar modification])
+TESTWSP([default value],[],
+[-namechar ":."],
+[${x:foo:-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([default value (defined)],[],
+[-namechar ":." -Dx:foo=qux],
+[${x:foo:-bar}],
+[NF: 1
+0: qux
+TOTAL: 1
+],
+[])
+
+TESTWSP([default value (:- null)],[],
+[-namechar ":." -Dx:foo=],
+[${x:foo:-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+],
+[])
+
+TESTWSP([default value (- null)],[],
+[-namechar ":." -Dx:foo=],
+[${x:foo-bar}],
+[NF: 0
+TOTAL: 0
+],
+[])
+
+TESTWSP([default value (- null, unset)],[],
+[-namechar ":."],
+[${x:foo-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([assign default values],[],
+[-namechar ":."],
+[${x:foo=bar}
+$x:foo],
+[NF: 1
+0: bar
+TOTAL: 1
+NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([default error message (var defined)],[],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:?} test],
+[NF: 3
+0: a
+1: bar
+2: test
+TOTAL: 3
+])
+
+TESTWSP([default error message],[],
+[-namechar ":."],
+[${x:foo:?}],
+[NF: 0
+TOTAL: 0
+],
+[x:foo: variable null or not set
+])
+
+TESTWSP([custom error message (defined)],[wsp-custom-err wsp-custom-err03],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:?please define it} test],
+[NF: 3
+0: a
+1: bar
+2: test
+TOTAL: 3
+])
+
+TESTWSP([custom error message],[wsp-custom-err wsp-custom-err04],
+[-namechar ":."],
+[a ${x:foo:?please define it} test],
+[NF: 2
+0: a
+1: test
+TOTAL: 2
+],
+[x:foo: please define it
+])
+
+TESTWSP([alternate value (defined)],[wsp-alt wsp-alt02],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:+isset} test],
+[NF: 3
+0: a
+1: isset
+2: test
+TOTAL: 3
+],
+[],
+[FOO=bar])
+
+TESTWSP([alternate value],[wsp-alt wsp-alt03],
+[-namechar ":."],
+[a ${x:foo:+isset} test],
+[NF: 2
+0: a
+1: test
+TOTAL: 2
+])
+
+
m4_popdef([TESTWSP])
diff --git a/wordsplit.c b/wordsplit.c
index d3ec9e1..99a8b4f 100644
--- a/wordsplit.c
+++ b/wordsplit.c
@@ -54,3 +54,10 @@
#define ISVARBEG(c) (ISALPHA(c) || c == '_')
-#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+static inline int
+is_name_char (struct wordsplit *wsp, int c)
+{
+ return ISALNUM (c)
+ || c == '_'
+ || ((wsp->ws_options & WRDSO_NAMECHAR)
+ && strchr (wsp->ws_namechar, c));
+}
@@ -94,2 +101,4 @@ _wsplt_seterr (struct wordsplit *wsp, int ec)
wordsplit_perror (wsp);
+ if (ec == WRDSE_USAGE)
+ errno = EINVAL;
return ec;
@@ -174,4 +183,5 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
- wss->ws_options = wsp->ws_options;
-
+ wss->ws_options = wsp->ws_options & ~WRDSO_MAXWORDS;
+ wss->ws_namechar = wsp->ws_namechar;
+
flags |= WRDSF_DELIM
@@ -262,7 +272,3 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!wsp->ws_command)
- {
- _wsplt_seterr (wsp, WRDSE_USAGE);
- errno = EINVAL;
- return wsp->ws_errno;
- }
+ return _wsplt_seterr (wsp, WRDSE_USAGE);
}
@@ -335,2 +341,10 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
+ if (wsp->ws_options & WRDSO_NAMECHAR)
+ {
+ if (wsp->ws_namechar[strcspn(wsp->ws_namechar, "${}*@-+?=")])
+ return _wsplt_seterr (wsp, WRDSE_USAGE);
+ }
+ else
+ wsp->ws_namechar = NULL;
+
wsp->ws_endp = 0;
@@ -1389,3 +1403,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
for (i = 1; i < len; i++)
- if (!ISVARCHR (str[i]))
+ if (!is_name_char (wsp, str[i]))
break;
@@ -1431,17 +1445,8 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
{
+ int i0 = str[0] == '-' ? 1 : 0;
str++;
len--;
- for (i = str[0] == '-' ? 1 : 0; i < len; i++)
+ for (i = i0; i < len; i++)
{
- if (str[i] == ':')
- {
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_paren (str, i + 1, len, &j, "{}"))
- return _wsplt_seterr (wsp, WRDSE_CBRACE);
- *pend = str + j;
- break;
- }
- else if (str[i] == '}')
+ if (str[i] == '}')
{
@@ -1458,2 +1463,4 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ if (i > i0 + 1 && str[i-1] == ':')
+ i--;
*pend = str + j;
@@ -1475,4 +1482,6 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
}
- else if (!ISVARCHR (str[i]))
+ else if (!is_name_char (wsp, str[i]))
{
+ if (str[i] == ':' && i + 1 < len && strchr ("-+?=", str[i+1]))
+ continue;
return expvar_recover (wsp, str - 1, ptail, pend, flg);
@@ -1497,6 +1506,14 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
- if (defstr && strchr("-+?=", defstr[0]) == 0)
+ if (is_param)
{
- rc = WRDSE_UNDEF;
- defstr = NULL;
+ if (param_idx >= 0 && param_idx < wsp->ws_paramc)
+ {
+ value = strdup (wsp->ws_paramv[param_idx]);
+ if (!value)
+ rc = WRDSE_NOSPACE;
+ else
+ rc = WRDSE_OK;
+ }
+ else
+ rc = WRDSE_UNDEF;
}
@@ -1504,43 +1521,27 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
{
- if (is_param)
+ if (wsp->ws_flags & WRDSF_GETVAR)
{
- if (param_idx >= 0 && param_idx < wsp->ws_paramc)
+ if (wsp->ws_options & WRDSO_GETVARPREF)
{
- value = strdup (wsp->ws_paramv[param_idx]);
- if (!value)
- rc = WRDSE_NOSPACE;
- else
- rc = WRDSE_OK;
+ rc = wsplt_env_getvar (wsp, str, i, &value);
+ if (rc == WRDSE_UNDEF)
+ rc = wsplt_env_lookup (wsp, str, i, &value);
}
else
- rc = WRDSE_UNDEF;
- }
- else
- {
- if (wsp->ws_flags & WRDSF_GETVAR)
{
- if (wsp->ws_options & WRDSO_GETVARPREF)
- {
- rc = wsplt_env_getvar (wsp, str, i, &value);
- if (rc == WRDSE_UNDEF)
- rc = wsplt_env_lookup (wsp, str, i, &value);
- }
- else
- {
- rc = wsplt_env_lookup (wsp, str, i, &value);
- if (rc == WRDSE_UNDEF)
- rc = wsplt_env_getvar (wsp, str, i, &value);
- }
+ rc = wsplt_env_lookup (wsp, str, i, &value);
+ if (rc == WRDSE_UNDEF)
+ rc = wsplt_env_getvar (wsp, str, i, &value);
}
- else
- rc = wsplt_env_lookup (wsp, str, i, &value);
}
+ else
+ rc = wsplt_env_lookup (wsp, str, i, &value);
+ }
- if (rc == WRDSE_OK
- && (!value || value[0] == 0)
- && defstr && defstr[-1] == ':')
- {
- free (value);
- rc = WRDSE_UNDEF;
- }
+ if (rc == WRDSE_OK
+ && (!value || value[0] == 0)
+ && defstr && defstr[-1] == ':')
+ {
+ free (value);
+ rc = WRDSE_UNDEF;
}
@@ -1630,3 +1631,3 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
{
- _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, *pend - str + 1);
+ _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, i);
return 1;
diff --git a/wordsplit.h b/wordsplit.h
index 3451979..7c14cea 100644
--- a/wordsplit.h
+++ b/wordsplit.h
@@ -54,2 +54,6 @@ struct wordsplit
with backslash. */
+ const char *ws_namechar; /* [Input] (WRDSO_NAMECHAR) Characters that can
+ be parts of a variable name, in addition to
+ alphanumerics and underscore. */
+
void (*ws_alloc_die) (wordsplit_t *wsp);
@@ -249,2 +253,4 @@ struct wordsplit
#define WRDSO_PARAM_NEGIDX 0x00008000
+/* ws_namechar member is initialized */
+#define WRDSO_NAMECHAR 0x00010000
diff --git a/wsp.c b/wsp.c
index cea7980..75fd6f5 100644
--- a/wsp.c
+++ b/wsp.c
@@ -43,3 +43,10 @@ enum env_type
env_null, /* Null environment */
- env_sys /* Use system environment */
+ env_sys, /* Use system environment */
+ env_extra /* Use small built-in "extra" environment */
+ };
+
+enum
+ {
+ MAX_F_ENV = 16,
+ MAX_X_ENV = 16
};
@@ -56,5 +63,10 @@ struct wsclosure
WRDSF_DOOFFS flag. */
- char **fenvbase; /* Environment for testing the ws_getenv function */
+ char *fenvbase[MAX_F_ENV+1];
+ /* Environment for testing the ws_getenv function */
int fenvidx; /* Number of variables in fenvbase */
- int fenvmax; /* Size of fenbase (entries) */
+
+ char *xenvbase[MAX_X_ENV+1];
+ /* Extra environment variables */
+ int xenvidx; /* Number of variables in xenvbase */
+
int append_start; /* First argument to append (index in argv) */
@@ -112,3 +124,3 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc)
{
- assert (wsc->fenvidx < wsc->fenvmax - 1);
+ assert (wsc->fenvidx < MAX_F_ENV);
wsc->fenvbase[wsc->fenvidx++] = opt;
@@ -120,2 +132,27 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc)
+ if (strncmp (opt, "-D", 2) == 0)
+ {
+ char *asgn;
+
+ if (opt[2])
+ asgn = opt + 2;
+ else if (wsoptind == argc)
+ {
+ fprintf (stderr, "%s: missing arguments for -D\n",
+ progname);
+ exit (1);
+ }
+ else
+ asgn = argv[wsoptind++];
+
+ if (strchr (asgn, '='))
+ {
+ assert (wsc->xenvidx < MAX_F_ENV);
+ wsc->xenvbase[wsc->xenvidx++] = asgn;
+ return 0;
+ }
+ wsoptind--;
+ return EOF;
+ }
+
if (strcmp (opt, "--version") == 0)
@@ -308,2 +345,10 @@ setfn_maxwords (int flag, int neg, char *arg, struct wsclosure *wsc)
static void
+setfn_namechar (int flag, int neg, char *arg, struct wsclosure *wsc)
+{
+ wsc->wsflags |= WRDSF_OPTIONS;
+ wsc->ws.ws_options |= WRDSO_NAMECHAR;
+ wsc->ws.ws_namechar = arg;
+}
+
+static void
setfn_global (int flag, int neg, char *arg, struct wsclosure *wsc)
@@ -325,2 +370,4 @@ setfn_env (int flag, int neg, char *arg, struct wsclosure *wsc)
wsc->env_type = env_sys;
+ else if (strcmp (arg, "extra") == 0)
+ wsc->env_type = env_extra;
else
@@ -402,2 +449,3 @@ struct wsopt opttab[] = {
{ "maxwords", WRDSO_MAXWORDS, ws_required_argument, setfn_maxwords },
+ { "namechar", WRDSO_NAMECHAR, ws_required_argument, setfn_namechar },
/* String options */
@@ -422,3 +470,3 @@ help (void)
- printf ("usage: %s [options] [VAR=VALUE...] [-- EXTRA...]\n", progname);
+ printf ("usage: %s [options] [-D VAR=VALUE ...] [VAR=VALUE...] [-- EXTRA...]\n", progname);
printf ("options are:\n");
@@ -482,3 +530,3 @@ print_qword (const char *word, int plaintext)
static char **
-make_env_kv ()
+make_env_kv (char **origenv)
{
@@ -488,3 +536,3 @@ make_env_kv ()
/* Count the number of entries */
- for (i = 0; environ[i]; i++)
+ for (i = 0; origenv[i]; i++)
;
@@ -495,11 +543,11 @@ make_env_kv ()
- for (i = j = 0; environ[i]; i++)
+ for (i = j = 0; origenv[i]; i++)
{
- size_t len = strcspn (environ[i], "=");
+ size_t len = strcspn (origenv[i], "=");
char *p = malloc (len+1);
assert (p != NULL);
- memcpy (p, environ[i], len);
+ memcpy (p, origenv[i], len);
p[len] = 0;
newenv[j++] = p;
- p = strdup (environ[i] + len + 1);
+ p = strdup (origenv[i] + len + 1);
assert (p != NULL);
@@ -628,3 +676,2 @@ main (int argc, char **argv)
struct wsclosure wsc;
- char *fenvbase[128];
char buf[1024], *ptr, *saved_ptr;
@@ -636,5 +683,4 @@ main (int argc, char **argv)
wsc.offarg = 0;
- wsc.fenvbase = fenvbase;
- wsc.fenvmax = sizeof (fenvbase) / sizeof (fenvbase[0]);
wsc.fenvidx = 0;
+ wsc.xenvidx = 0;
wsc.ws.ws_options = 0;
@@ -649,8 +695,10 @@ main (int argc, char **argv)
+ wsc.fenvbase[wsc.fenvidx] = NULL;
+ wsc.xenvbase[wsc.xenvidx] = NULL;
+
if (wsc.fenvidx > 0)
{
- wsc.fenvbase[wsc.fenvidx] = NULL;
wsc.wsflags |= WRDSF_GETVAR | WRDSF_CLOSURE;
wsc.ws.ws_getvar = wsp_getvar;
- wsc.ws.ws_closure = fenvbase;
+ wsc.ws.ws_closure = wsc.fenvbase;
}
@@ -676,7 +724,41 @@ main (int argc, char **argv)
case env_sys:
+ {
+ char **newenv;
+
+ if (wsc.xenvidx)
+ {
+ size_t i, j;
+ for (i = 0; environ[i]; i++)
+ ;
+ newenv = calloc (i + wsc.xenvidx + 1, sizeof (*newenv));
+ assert (newenv != NULL);
+ for (i = 0; environ[i]; i++)
+ {
+ newenv[i] = strdup (environ[i]);
+ assert (newenv[i] != NULL);
+ }
+ for (j = 0; j < wsc.xenvidx; j++, i++)
+ {
+ newenv[i] = strdup (wsc.xenvbase[j]);
+ assert (newenv[i] != NULL);
+ }
+ newenv[i] = NULL;
+ }
+ else
+ newenv = environ;
+
+ wsc.wsflags |= WRDSF_ENV;
+ if (wsc.wsflags & WRDSF_ENV_KV)
+ wsc.ws.ws_env = (const char **) make_env_kv (newenv);
+ else
+ wsc.ws.ws_env = (const char **) newenv;
+ }
+ break;
+
+ case env_extra:
wsc.wsflags |= WRDSF_ENV;
if (wsc.wsflags & WRDSF_ENV_KV)
- wsc.ws.ws_env = (const char **) make_env_kv ();
+ wsc.ws.ws_env = (const char **) make_env_kv (wsc.xenvbase);
else
- wsc.ws.ws_env = (const char **) environ;
+ wsc.ws.ws_env = (const char **) wsc.xenvbase;
break;

Return to:

Send suggestions and report system problems to the System administrator.