aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2019-07-24 13:19:20 +0300
committerSergey Poznyakoff <gray@gnu.org>2019-07-24 13:36:25 +0300
commit7eaa3c45bedf204a274f5de2da4269d510d2bd56 (patch)
tree17977566b8a2ca6ed722ea75166cd0330ad30e81
parent6a7581f2e60a600a4915e4f55b74a15c80701978 (diff)
downloadwordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.gz
wordsplit-7eaa3c45bedf204a274f5de2da4269d510d2bd56.tar.bz2
Allow the caller to modify variable name constituents.
This new feature makes it possible to expand variables with dots or other unusual characters in their names. * README: Update. * wordsplit.3: Document the use of the ws_namechar member. * wordsplit.c (is_name_char): New static function. (ISVARCHR): Removed. Use is_name_char instead. (_wsplt_seterr): Set errno to EINVAL if WRDSE_USAGE is returned. (_wsplt_subsplit): Clear the WRDSO_MAXWORDS option in the subsplit. Pass ws_namechar. (wordsplit_init): Check for valid ws_namechar content. (expvar): Take into account ws_namechar when scanning variable name. Fix name length passed to the _wsplt_setctxerr call. * wordsplit.h (ws_namechar): New member. (WRDSO_NAMECHAR): New option bit. * wsp.c: New options: -D to define an "extra" environment entry (possibly containing characters not allowed by the shell), and -namechar to define additional variable name constituents. * wordsplit.at: Test namechar modifications.
-rw-r--r--README4
-rw-r--r--wordsplit.363
-rw-r--r--wordsplit.at132
-rw-r--r--wordsplit.c61
-rw-r--r--wordsplit.h6
-rw-r--r--wsp.c118
6 files changed, 333 insertions, 51 deletions
diff --git a/README b/README
index 96ffbec..16f2c04 100644
--- a/README
+++ b/README
@@ -230,8 +230,8 @@ the suite. It was therefore decided that it would be advisable to
have wordsplit as a separate package which could be easily included in
another project without incurring unnecessary overhead.
-Currently the work is underway on incorporating it into existing
-projects.
+By the end of July 2019, all mentioned packages switched to using
+wordsplit as a submodule.
* References
diff --git a/wordsplit.3 b/wordsplit.3
index e742030..337170f 100644
--- a/wordsplit.3
+++ b/wordsplit.3
@@ -14,7 +14,7 @@
.\" You should have received a copy of the GNU General Public License
.\" along with wordsplit. If not, see <http://www.gnu.org/licenses/>.
.\"
-.TH WORDSPLIT 3 "July 9, 2019" "WORDSPLIT" "Wordsplit User Reference"
+.TH WORDSPLIT 3 "July 24, 2019" "WORDSPLIT" "Wordsplit User Reference"
.SH NAME
wordsplit \- split string into words
.SH SYNOPSIS
@@ -460,6 +460,43 @@ for each such word using
When matching a pattern, the dot at the start of a name or immediately
following a slash must be matched explicitly, unless
the \fBWRDSO_DOTGLOB\fR option is set.
+.SH VARIABLE NAMES
+By default a shell-like lexical structure of a variable name is
+assumed. A valid variable name begins with an alphabetical
+character or underscore and contains alphabetical characters, digits
+and underscores.
+.PP
+The set of characters that constitute a variable name can be
+augmented. To do so, initialize the \fBws_namechar\fR member to the
+C string containing the characters to be added, set the
+\fBWRDSO_NAMECHAR\fR bit in \fBws_options\fR and set the
+\fBWRDSF_OPTIONS\fR bit in the \fIflags\fR argument.
+.PP
+For example, to allow semicolon in variable names, do:
+.PP
+.EX
+struct wordsplit ws;
+ws.ws_namechar = ":";
+ws.ws_options = WRDSO_NAMECHAR;
+wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS);
+.EE
+.PP
+Certain characters cannot be allowed to be a name costituent. These
+are:
+.BR $ ,
+.BR { ,
+.BR } ,
+.BR * ,
+.BR @ ,
+.BR \- ,
+.BR + ,
+.BR ? ,
+and
+.BR = .
+If any of these appears in \fBws_namechar\fR, the \fBwordsplit\fR (and
+\fBwordsplit_len\fR) function will return the
+.B WRDSE_USAGE
+error.
.SH LIMITING THE NUMBER OF WORDS
The maximum number of words to be returned can be limited by setting
the \fBws_maxwords\fR member to the desired count, and setting the
@@ -608,6 +645,18 @@ tabulation character and \fB\\n\fR into newline.
.B WRDSF_ESCAPE
flag must be set if this member is initialized.
.TP
+.BI "const char *" ws_namechar
+Lists characters that are allowed in a variable name, in addition to
+alphanumerics and underscore. The
+.B WRDSO_NAMECHAR
+bit must be set in
+.B ws_options
+for this to take effect.
+.sp
+See the chapter
+.BR "VARIABLE NAMES" ,
+for a detailed discussion.
+.TP
.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
This function is called when
.B wordsplit
@@ -984,6 +1033,18 @@ positional argument references. A negative argument reference has the
form \fB${-\fIN\fB}\fR. It is expanded to the value of the argument
with index \fB\fIws_paramc\fR \- \fIN\fR, i.e. \fIN\fRth if counting
from the end.
+.TP
+.B WRDSO_NAMECHAR
+When set, indicates that the
+.B ws_namechar
+member of the
+.B wordsplit_t
+struct has been initialized.
+.sp
+This member allows you to modify the notion of what characters can be
+part of a valid variable name. See the chapter
+.BR "VARIABLE NAMES" ,
+for a detailed discussion.
.SH "ERROR CODES"
.TP
.BR WRDSE_OK ", " WRDSE_EOF
diff --git a/wordsplit.at b/wordsplit.at
index d7d8bc9..aa2c87d 100644
--- a/wordsplit.at
+++ b/wordsplit.at
@@ -1021,6 +1021,138 @@ NF: 1
TOTAL: 1
])
+# Namechar modification tests
+
+TESTWSP([namechar modification],[],
+[-namechar ".:" -Dx.y=one -Dx:foo=bar],
+[one is $x.y, foo is $x:foo],
+[NF: 6
+0: one
+1: is
+2: one,
+3: foo
+4: is
+5: bar
+TOTAL: 6
+])
+
+AT_BANNER([namechar modification])
+TESTWSP([default value],[],
+[-namechar ":."],
+[${x:foo:-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([default value (defined)],[],
+[-namechar ":." -Dx:foo=qux],
+[${x:foo:-bar}],
+[NF: 1
+0: qux
+TOTAL: 1
+],
+[])
+
+TESTWSP([default value (:- null)],[],
+[-namechar ":." -Dx:foo=],
+[${x:foo:-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+],
+[])
+
+TESTWSP([default value (- null)],[],
+[-namechar ":." -Dx:foo=],
+[${x:foo-bar}],
+[NF: 0
+TOTAL: 0
+],
+[])
+
+TESTWSP([default value (- null, unset)],[],
+[-namechar ":."],
+[${x:foo-bar}],
+[NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([assign default values],[],
+[-namechar ":."],
+[${x:foo=bar}
+$x:foo],
+[NF: 1
+0: bar
+TOTAL: 1
+NF: 1
+0: bar
+TOTAL: 1
+])
+
+TESTWSP([default error message (var defined)],[],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:?} test],
+[NF: 3
+0: a
+1: bar
+2: test
+TOTAL: 3
+])
+
+TESTWSP([default error message],[],
+[-namechar ":."],
+[${x:foo:?}],
+[NF: 0
+TOTAL: 0
+],
+[x:foo: variable null or not set
+])
+
+TESTWSP([custom error message (defined)],[wsp-custom-err wsp-custom-err03],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:?please define it} test],
+[NF: 3
+0: a
+1: bar
+2: test
+TOTAL: 3
+])
+
+TESTWSP([custom error message],[wsp-custom-err wsp-custom-err04],
+[-namechar ":."],
+[a ${x:foo:?please define it} test],
+[NF: 2
+0: a
+1: test
+TOTAL: 2
+],
+[x:foo: please define it
+])
+
+TESTWSP([alternate value (defined)],[wsp-alt wsp-alt02],
+[-namechar ":." -Dx:foo=bar],
+[a ${x:foo:+isset} test],
+[NF: 3
+0: a
+1: isset
+2: test
+TOTAL: 3
+],
+[],
+[FOO=bar])
+
+TESTWSP([alternate value],[wsp-alt wsp-alt03],
+[-namechar ":."],
+[a ${x:foo:+isset} test],
+[NF: 2
+0: a
+1: test
+TOTAL: 2
+])
+
+
m4_popdef([TESTWSP])
m4_popdef([wspnum])
m4_popdef([wspid])
diff --git a/wordsplit.c b/wordsplit.c
index d3ec9e1..99a8b4f 100644
--- a/wordsplit.c
+++ b/wordsplit.c
@@ -52,7 +52,14 @@
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
#define ISVARBEG(c) (ISALPHA(c) || c == '_')
-#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+static inline int
+is_name_char (struct wordsplit *wsp, int c)
+{
+ return ISALNUM (c)
+ || c == '_'
+ || ((wsp->ws_options & WRDSO_NAMECHAR)
+ && strchr (wsp->ws_namechar, c));
+}
#define WSP_RETURN_DELIMS(wsp) \
((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
@@ -92,6 +99,8 @@ _wsplt_seterr (struct wordsplit *wsp, int ec)
wsp->ws_errno = ec;
if (wsp->ws_flags & WRDSF_SHOWERR)
wordsplit_perror (wsp);
+ if (ec == WRDSE_USAGE)
+ errno = EINVAL;
return ec;
}
@@ -172,7 +181,8 @@ _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
flags |= wsp->ws_flags & WRDSF_CLOSURE;
}
- wss->ws_options = wsp->ws_options;
+ wss->ws_options = wsp->ws_options & ~WRDSO_MAXWORDS;
+ wss->ws_namechar = wsp->ws_namechar;
flags |= WRDSF_DELIM
| WRDSF_ALLOC_DIE
@@ -260,11 +270,7 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_NOCMD))
{
if (!wsp->ws_command)
- {
- _wsplt_seterr (wsp, WRDSE_USAGE);
- errno = EINVAL;
- return wsp->ws_errno;
- }
+ return _wsplt_seterr (wsp, WRDSE_USAGE);
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
@@ -333,6 +339,14 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
wsp->ws_paramidx = wsp->ws_paramsiz = 0;
wsp->ws_parambuf = NULL;
+ if (wsp->ws_options & WRDSO_NAMECHAR)
+ {
+ if (wsp->ws_namechar[strcspn(wsp->ws_namechar, "${}*@-+?=")])
+ return _wsplt_seterr (wsp, WRDSE_USAGE);
+ }
+ else
+ wsp->ws_namechar = NULL;
+
wsp->ws_endp = 0;
wsp->ws_wordi = 0;
@@ -1387,7 +1401,7 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
if (ISVARBEG (str[0]))
{
for (i = 1; i < len; i++)
- if (!ISVARCHR (str[i]))
+ if (!is_name_char (wsp, str[i]))
break;
*pend = str + i - 1;
}
@@ -1429,21 +1443,12 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
&& (str[1] == '-'
&& ISDIGIT (str[2]))))) != 0))
{
+ int i0 = str[0] == '-' ? 1 : 0;
str++;
len--;
- for (i = str[0] == '-' ? 1 : 0; i < len; i++)
- {
- if (str[i] == ':')
+ for (i = i0; i < len; i++)
{
- size_t j;
-
- defstr = str + i + 1;
- if (find_closing_paren (str, i + 1, len, &j, "{}"))
- return _wsplt_seterr (wsp, WRDSE_CBRACE);
- *pend = str + j;
- break;
- }
- else if (str[i] == '}')
+ if (str[i] == '}')
{
defstr = NULL;
*pend = str + i;
@@ -1456,6 +1461,8 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
defstr = str + i;
if (find_closing_paren (str, i, len, &j, "{}"))
return _wsplt_seterr (wsp, WRDSE_CBRACE);
+ if (i > i0 + 1 && str[i-1] == ':')
+ i--;
*pend = str + j;
break;
}
@@ -1473,8 +1480,10 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
return expvar_recover (wsp, str - 1, ptail, pend, flg);
}
}
- else if (!ISVARCHR (str[i]))
+ else if (!is_name_char (wsp, str[i]))
{
+ if (str[i] == ':' && i + 1 < len && strchr ("-+?=", str[i+1]))
+ continue;
return expvar_recover (wsp, str - 1, ptail, pend, flg);
}
}
@@ -1495,13 +1504,6 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
i - its length
defstr - default replacement str */
- if (defstr && strchr("-+?=", defstr[0]) == 0)
- {
- rc = WRDSE_UNDEF;
- defstr = NULL;
- }
- else
- {
if (is_param)
{
if (param_idx >= 0 && param_idx < wsp->ws_paramc)
@@ -1543,7 +1545,6 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
free (value);
rc = WRDSE_UNDEF;
}
- }
switch (rc)
{
@@ -1628,7 +1629,7 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
}
else if (wsp->ws_flags & WRDSF_UNDEF)
{
- _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, *pend - str + 1);
+ _wsplt_setctxerr (wsp, WRDSE_UNDEF, str, i);
return 1;
}
else
diff --git a/wordsplit.h b/wordsplit.h
index 3451979..7c14cea 100644
--- a/wordsplit.h
+++ b/wordsplit.h
@@ -52,6 +52,10 @@ struct wordsplit
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
with backslash. */
+ const char *ws_namechar; /* [Input] (WRDSO_NAMECHAR) Characters that can
+ be parts of a variable name, in addition to
+ alphanumerics and underscore. */
+
void (*ws_alloc_die) (wordsplit_t *wsp);
/* [Input] (WRDSF_ALLOC_DIE) Function called when
out of memory. Must not return. */
@@ -247,6 +251,8 @@ struct wordsplit
/* Enable negative positional indices (${-1} is the last positional
parameter) */
#define WRDSO_PARAM_NEGIDX 0x00008000
+/* ws_namechar member is initialized */
+#define WRDSO_NAMECHAR 0x00010000
#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
#define WRDSO_OESC WRDSO_OESC_WORD
diff --git a/wsp.c b/wsp.c
index cea7980..75fd6f5 100644
--- a/wsp.c
+++ b/wsp.c
@@ -41,7 +41,14 @@ enum env_type
{
env_none, /* No environment */
env_null, /* Null environment */
- env_sys /* Use system environment */
+ env_sys, /* Use system environment */
+ env_extra /* Use small built-in "extra" environment */
+ };
+
+enum
+ {
+ MAX_F_ENV = 16,
+ MAX_X_ENV = 16
};
struct wsclosure
@@ -54,9 +61,14 @@ struct wsclosure
the argv array. The ws.ws_dooffs field gives
the number of such variables. Forces the
WRDSF_DOOFFS flag. */
- char **fenvbase; /* Environment for testing the ws_getenv function */
+ char *fenvbase[MAX_F_ENV+1];
+ /* Environment for testing the ws_getenv function */
int fenvidx; /* Number of variables in fenvbase */
- int fenvmax; /* Size of fenbase (entries) */
+
+ char *xenvbase[MAX_X_ENV+1];
+ /* Extra environment variables */
+ int xenvidx; /* Number of variables in xenvbase */
+
int append_start; /* First argument to append (index in argv) */
int append_count; /* Number of arguments to append */
};
@@ -110,7 +122,7 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc)
{
if (strchr (opt, '='))
{
- assert (wsc->fenvidx < wsc->fenvmax - 1);
+ assert (wsc->fenvidx < MAX_F_ENV);
wsc->fenvbase[wsc->fenvidx++] = opt;
return 0;
}
@@ -118,6 +130,31 @@ getwsopt (int argc, char **argv, struct wsopt *wso, struct wsclosure *wsc)
return EOF;
}
+ if (strncmp (opt, "-D", 2) == 0)
+ {
+ char *asgn;
+
+ if (opt[2])
+ asgn = opt + 2;
+ else if (wsoptind == argc)
+ {
+ fprintf (stderr, "%s: missing arguments for -D\n",
+ progname);
+ exit (1);
+ }
+ else
+ asgn = argv[wsoptind++];
+
+ if (strchr (asgn, '='))
+ {
+ assert (wsc->xenvidx < MAX_F_ENV);
+ wsc->xenvbase[wsc->xenvidx++] = asgn;
+ return 0;
+ }
+ wsoptind--;
+ return EOF;
+ }
+
if (strcmp (opt, "--version") == 0)
{
print_version ();
@@ -306,6 +343,14 @@ setfn_maxwords (int flag, int neg, char *arg, struct wsclosure *wsc)
}
static void
+setfn_namechar (int flag, int neg, char *arg, struct wsclosure *wsc)
+{
+ wsc->wsflags |= WRDSF_OPTIONS;
+ wsc->ws.ws_options |= WRDSO_NAMECHAR;
+ wsc->ws.ws_namechar = arg;
+}
+
+static void
setfn_global (int flag, int neg, char *arg, struct wsclosure *wsc)
{
if (neg)
@@ -323,6 +368,8 @@ setfn_env (int flag, int neg, char *arg, struct wsclosure *wsc)
wsc->env_type = env_null;
else if (strcmp (arg, "sys") == 0)
wsc->env_type = env_sys;
+ else if (strcmp (arg, "extra") == 0)
+ wsc->env_type = env_extra;
else
{
fprintf (stderr, "%s: environment flag: %s\n", progname, arg);
@@ -400,6 +447,7 @@ struct wsopt opttab[] = {
{ "novarsplit", WRDSO_NOVARSPLIT, ws_boolean, setfn_option },
{ "nocmdsplit", WRDSO_NOCMDSPLIT, ws_boolean, setfn_option },
{ "maxwords", WRDSO_MAXWORDS, ws_required_argument, setfn_maxwords },
+ { "namechar", WRDSO_NAMECHAR, ws_required_argument, setfn_namechar },
/* String options */
{ "delim", WRDSF_DELIM, ws_required_argument, setfn_delim },
{ "comment", WRDSF_COMMENT,ws_required_argument, setfn_comment },
@@ -420,7 +468,7 @@ help (void)
{
size_t i;
- printf ("usage: %s [options] [VAR=VALUE...] [-- EXTRA...]\n", progname);
+ printf ("usage: %s [options] [-D VAR=VALUE ...] [VAR=VALUE...] [-- EXTRA...]\n", progname);
printf ("options are:\n");
for (i = 0; opttab[i].name; i++)
{
@@ -480,28 +528,28 @@ print_qword (const char *word, int plaintext)
/* Convert environment to K/V form */
static char **
-make_env_kv ()
+make_env_kv (char **origenv)
{
size_t i, j, size;
char **newenv;
/* Count the number of entries */
- for (i = 0; environ[i]; i++)
+ for (i = 0; origenv[i]; i++)
;
size = i * 2 + 1;
newenv = calloc (size, sizeof (newenv[0]));
assert (newenv != NULL);
- for (i = j = 0; environ[i]; i++)
+ for (i = j = 0; origenv[i]; i++)
{
- size_t len = strcspn (environ[i], "=");
+ size_t len = strcspn (origenv[i], "=");
char *p = malloc (len+1);
assert (p != NULL);
- memcpy (p, environ[i], len);
+ memcpy (p, origenv[i], len);
p[len] = 0;
newenv[j++] = p;
- p = strdup (environ[i] + len + 1);
+ p = strdup (origenv[i] + len + 1);
assert (p != NULL);
newenv[j++] = p;
}
@@ -626,7 +674,6 @@ int
main (int argc, char **argv)
{
struct wsclosure wsc;
- char *fenvbase[128];
char buf[1024], *ptr, *saved_ptr;
int next_call = 0;
@@ -634,9 +681,8 @@ main (int argc, char **argv)
wsc.wsflags = 0;
wsc.env_type = env_sys;
wsc.offarg = 0;
- wsc.fenvbase = fenvbase;
- wsc.fenvmax = sizeof (fenvbase) / sizeof (fenvbase[0]);
wsc.fenvidx = 0;
+ wsc.xenvidx = 0;
wsc.ws.ws_options = 0;
wsc.wsflags = (WRDSF_DEFFLAGS & ~WRDSF_NOVAR) |
WRDSF_ENOMEMABRT |
@@ -647,12 +693,14 @@ main (int argc, char **argv)
while (getwsopt (argc, argv, opttab, &wsc) != EOF)
;
+ wsc.fenvbase[wsc.fenvidx] = NULL;
+ wsc.xenvbase[wsc.xenvidx] = NULL;
+
if (wsc.fenvidx > 0)
{
- wsc.fenvbase[wsc.fenvidx] = NULL;
wsc.wsflags |= WRDSF_GETVAR | WRDSF_CLOSURE;
wsc.ws.ws_getvar = wsp_getvar;
- wsc.ws.ws_closure = fenvbase;
+ wsc.ws.ws_closure = wsc.fenvbase;
}
if (wsoptind < argc)
@@ -674,11 +722,45 @@ main (int argc, char **argv)
break;
case env_sys:
+ {
+ char **newenv;
+
+ if (wsc.xenvidx)
+ {
+ size_t i, j;
+ for (i = 0; environ[i]; i++)
+ ;
+ newenv = calloc (i + wsc.xenvidx + 1, sizeof (*newenv));
+ assert (newenv != NULL);
+ for (i = 0; environ[i]; i++)
+ {
+ newenv[i] = strdup (environ[i]);
+ assert (newenv[i] != NULL);
+ }
+ for (j = 0; j < wsc.xenvidx; j++, i++)
+ {
+ newenv[i] = strdup (wsc.xenvbase[j]);
+ assert (newenv[i] != NULL);
+ }
+ newenv[i] = NULL;
+ }
+ else
+ newenv = environ;
+
+ wsc.wsflags |= WRDSF_ENV;
+ if (wsc.wsflags & WRDSF_ENV_KV)
+ wsc.ws.ws_env = (const char **) make_env_kv (newenv);
+ else
+ wsc.ws.ws_env = (const char **) newenv;
+ }
+ break;
+
+ case env_extra:
wsc.wsflags |= WRDSF_ENV;
if (wsc.wsflags & WRDSF_ENV_KV)
- wsc.ws.ws_env = (const char **) make_env_kv ();
+ wsc.ws.ws_env = (const char **) make_env_kv (wsc.xenvbase);
else
- wsc.ws.ws_env = (const char **) environ;
+ wsc.ws.ws_env = (const char **) wsc.xenvbase;
break;
}

Return to:

Send suggestions and report system problems to the System administrator.