From 43e8d1ad415cd8af7f56f0346097dd2882e23628 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Wed, 11 Mar 2009 12:35:52 +0200 Subject: Start writing wordsplit. * doc/wydawca.texi: Update. * gconf/argcv.h, gconf/argcv.c: Remove. * gconf/wordsplit.c, gconf/wordsplit.h: New files. * gconf/Makefile.am (libgconf_a_SOURCES): Remove argcv, add wordsplit * gconf/gconf-lex.l, gconf/gconf-preproc.c: Use wordsplit, instead of argcv_get * src/directive.c, src/method.c: Likewise. * src/wydawca.h: Likewise. * tests/wordsplit.at: New file. * tests/wstest.c, tests/wsbatch.c: New files. * tests/Makefile.am (TESTSUITE_AT): Add wordsplit.at Build wstest and wsbatch. * tests/testsuite.at: Add wordsplit.at. --- gconf/Makefile.am | 6 +- gconf/argcv.c | 632 -------------------------------------------------- gconf/argcv.h | 79 ------- gconf/gconf-lex.l | 43 ++-- gconf/gconf-preproc.c | 18 +- gconf/wordsplit.c | 568 +++++++++++++++++++++++++++++++++++++++++++++ gconf/wordsplit.h | 88 +++++++ 7 files changed, 689 insertions(+), 745 deletions(-) delete mode 100644 gconf/argcv.c delete mode 100644 gconf/argcv.h create mode 100644 gconf/wordsplit.c create mode 100644 gconf/wordsplit.h (limited to 'gconf') diff --git a/gconf/Makefile.am b/gconf/Makefile.am index 4f50bd6..afe06b0 100644 --- a/gconf/Makefile.am +++ b/gconf/Makefile.am @@ -1,13 +1,13 @@ noinst_LIBRARIES=libgconf.a libgconf_a_SOURCES = \ - argcv.c\ - argcv.h\ gconf-format.c\ gconf-gram.y\ gconf-lex.l\ gconf-preproc.c\ gconf-text.c\ - gconf.h + gconf.h\ + wordsplit.c\ + wordsplit.h EXTRA_DIST=gconf-gram.h diff --git a/gconf/argcv.c b/gconf/argcv.c deleted file mode 100644 index d5f2bda..0000000 --- a/gconf/argcv.c +++ /dev/null @@ -1,632 +0,0 @@ -/* argcv.c - simple functions for parsing input based on whitespace - Copyright (C) 1999, 2000, 2001, 2003, 2004, - 2005, 2006, 2008, 2009 Free Software Foundation, Inc. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General - Public License along with this library; if not, write to the - Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301 USA */ - -#ifdef HAVE_CONFIG_H -# include -#endif - -#include -#include -#include -#include -#include -#include - -enum argcv_quoting_style argcv_quoting_style; - -#define _ARGCV_WORD_SED_EXPR 0x1000 -#define _ARGCV_WORD_MASK 0xf000 - -/* - * takes a string and splits it into several strings, breaking at ' ' - * command is the string to split - * the number of strings is placed into argc - * the split strings are put into argv - * returns 0 on success, nonzero on failure - */ - -#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n') -#define isdelim(c,delim) (strchr(delim,(c))!=NULL) - -struct argcv_info -{ - int len; - const char *command; - const char *delim; - const char *comment; - int flags; - - int start; - int end; - int save; - int finish_pos; -}; - -static void -init_argcv_info (struct argcv_info *ap, int flags, - int len, const char *command, const char *delim, - const char *comment) -{ - memset (ap, 0, sizeof *ap); - ap->len = len; - ap->command = command; - ap->delim = delim; - ap->comment = comment; - ap->flags = flags; -} - -static int -skip_sed_expr(const char *command, int i, int len) -{ - int state; - - do - { - int delim; - - if (command[i] == ';') - i++; - if (!(command[i] == 's' && i + 3 < len && c_ispunct(command[i+1]))) - break; - - delim = command[++i]; - state = 1; - for (i++; i < len; i++) - { - if (state == 3) - { - if (command[i] == delim || !c_isalnum(command[i])) - break; - } - else if (command[i] == '\\') - i++; - else if (command[i] == delim) - state++; - } - } - while (state == 3 && i < len && command[i] == ';'); - i--; - return i; -} - -static int -argcv_scan (struct argcv_info *ap) -{ - int i = 0; - int len = ap->len; - const char *command = ap->command; - const char *delim = ap->delim; - const char *comment = ap->comment; - - for (;;) - { - i = ap->save; - - if (i >= len) - return i + 1; - - if (ap->flags & ARGCV_WS) - { - /* Skip initial whitespace */ - while (i < len && isws (command[i])) - i++; - } - ap->start = i; - - ap->flags &= ~_ARGCV_WORD_MASK; - - if (ap->flags & ARGCV_SED_EXPR - && command[i] == 's' && i + 3 < len && c_ispunct(command[i+1])) - { - ap->flags |= _ARGCV_WORD_SED_EXPR; - i = skip_sed_expr(command, i, len); - } - else if (!isdelim (command[i], delim)) - { - while (i < len) - { - if (ap->flags & ARGCV_QUOTE) - { - if (command[i] == '\\') - { - if (++i == len) - break; - i++; - continue; - } - - if (command[i] == '\'' || command[i] == '"') - { - int j; - for (j = i + 1; j < len && command[j] != command[i]; j++) - if (command[j] == '\\') - j++; - if (j < len) - i = j + 1; - else - i++; - continue; - } - } - if ((ap->flags & ARGCV_WS && isws (command[i])) - || isdelim (command[i], delim)) - break; - else - i++; - } - i--; - } - else if (!(ap->flags & ARGCV_RETURN_DELIMS)) - { - if (ap->flags & ARGCV_SQUEEZE_DELIMS) - while (i < len && isdelim (command[i], delim)) - i++; - else if (i < len) - i++; - - ap->save = i; - continue; - } - - - ap->end = i; - ap->save = ap->finish_pos = i + 1; - - /* If we have a token, and it starts with a comment character, skip - to the newline and restart the token search. */ - if (ap->save <= len) - { - if (strchr (comment, command[ap->start]) != NULL) - { - ap->finish_pos = ap->start; - i = ap->save; - while (i < len && command[i] != '\n') - i++; - - ap->save = i; - continue; - } - } - break; - } - return ap->save; -} - -static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\tv\v"; - -int -argcv_unquote_char (int c) -{ - char *p; - - for (p = quote_transtab; *p; p += 2) - { - if (*p == c) - return p[1]; - } - return c; -} - -int -argcv_quote_char (int c) -{ - char *p; - - for (p = quote_transtab + sizeof(quote_transtab) - 2; - p > quote_transtab; p -= 2) - { - if (*p == c) - return p[-1]; - } - return -1; -} - -#define to_num(c) \ - (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 )) - -static int -xtonum (int *pval, const char *src, int base, int cnt) -{ - int i, val; - - for (i = 0, val = 0; i < cnt; i++, src++) - { - int n = *(unsigned char*)src; - if (n > 127 || (n = to_num(n)) >= base) - break; - val = val*base + n; - } - *pval = val; - return i; -} - -size_t -argcv_quoted_length (const char *str, int *quote) -{ - size_t len = 0; - - *quote = 0; - for (; *str; str++) - { - if (*str == ' ') - { - len++; - *quote = 1; - } - else if (*str == '"') - { - len += 2; - *quote = 1; - } - else if (*str != '\t' && *str != '\\' && isprint (*str)) - len++; - else - { - switch (argcv_quoting_style) - { - case argcv_quoting_octal: - if (argcv_quote_char (*str) != -1) - len += 2; - else - len += 4; - break; - - case argcv_quoting_hex: - len += 3; - break; - } - } - } - return len; -} - -void -argcv_unquote_copy (char *dst, const char *src, size_t n) -{ - int i = 0; - int c; - int expect_delim = 0; - - while (i < n) - { - switch (src[i]) - { - case '\'': - case '"': - if (!expect_delim) - { - const char *p; - - for (p = src+i+1; *p && *p != src[i]; p++) - if (*p == '\\') - p++; - if (*p) - expect_delim = src[i++]; - else - *dst++ = src[i++]; - } - else if (expect_delim == src[i]) - ++i; - else - *dst++ = src[i++]; - break; - - case '\\': - ++i; - if (src[i] == 'x' || src[i] == 'X') - { - if (n - i < 2) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - int off = xtonum(&c, src + i + 1, 16, 2); - if (off == 0) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - *dst++ = c; - i += off + 1; - } - } - } - else if ((unsigned char)src[i] < 128 && isdigit (src[i])) - { - if (n - i < 1) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - int off = xtonum (&c, src+i, 8, 3); - if (off == 0) - { - *dst++ = '\\'; - *dst++ = src[i++]; - } - else - { - *dst++ = c; - i += off; - } - } - } - else - *dst++ = argcv_unquote_char (src[i++]); - break; - - default: - *dst++ = src[i++]; - } - } - *dst = 0; -} - -void -argcv_quote_copy (char *dst, const char *src) -{ - for (; *src; src++) - { - if (*src == '"') - { - *dst++ = '\\'; - *dst++ = *src; - } - else if (*src != '\t' && *src != '\\' && isprint(*src)) - *dst++ = *src; - else - { - char tmp[4]; - - switch (argcv_quoting_style) - { - case argcv_quoting_octal: - { - int c = argcv_quote_char (*src); - *dst++ = '\\'; - if (c != -1) - *dst++ = c; - else - { - snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src); - memcpy (dst, tmp, 3); - dst += 3; - } - break; - } - - case argcv_quoting_hex: - snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char*)src); - memcpy (dst, tmp, 3); - dst += 3; - break; - } - } - } -} - -int -argcv_get_np (const char *command, int len, - const char *delim, const char *cmnt, - int flags, - int *pargc, char ***pargv, char **endp) -{ - int i = 0; - struct argcv_info info; - int argc; - char **argv; - - if (!delim) - delim = " "; - if (!cmnt) - cmnt = ""; - - init_argcv_info (&info, flags, len, command, delim, cmnt); - - /* Count number of arguments */ - argc = 0; - while (argcv_scan (&info) <= len) - argc++; - - argv = calloc ((argc + 1), sizeof (char *)); - if (argv == NULL) - return ENOMEM; - - i = 0; - info.save = 0; - for (i = 0; i < argc; i++) - { - int n; - int unquote; - - argcv_scan (&info); - - if (info.flags & ARGCV_QUOTE && !(info.flags & _ARGCV_WORD_SED_EXPR)) - { - if ((command[info.start] == '"' || command[info.end] == '\'') - && command[info.end] == command[info.start]) - { - if (info.start < info.end) - { - info.start++; - info.end--; - } - unquote = 0; - } - else - unquote = 1; - } - else - unquote = 0; - - n = info.end - info.start + 1; - argv[i] = calloc (n + 1, sizeof (char)); - if (argv[i] == NULL) - { - argcv_free (i, argv); - return ENOMEM; - } - if (unquote) - argcv_unquote_copy (argv[i], &command[info.start], n); - else - memcpy (argv[i], &command[info.start], n); - argv[i][n] = 0; - } - argv[i] = NULL; - - *pargc = argc; - *pargv = argv; - if (endp) - *endp = (char*) (command + info.finish_pos); - return 0; -} - -int -argcv_get_n (const char *command, int len, const char *delim, const char *cmnt, - int *pargc, char ***pargv) -{ - return argcv_get_np (command, len, delim, cmnt, ARGCV_DEFFLAGS, - pargc, pargv, NULL); -} - -int -argcv_get (const char *command, const char *delim, const char *cmnt, - int *argc, char ***argv) -{ - return argcv_get_np (command, strlen (command), delim, cmnt, - ARGCV_DEFFLAGS, - argc, argv, NULL); -} - - -/* - * frees all elements of an argv array - * argc is the number of elements - * argv is the array - */ -void -argcv_free (int argc, char **argv) -{ - if (argc) - { - while (--argc >= 0) - if (argv[argc]) - free (argv[argc]); - free (argv); - } -} - -void -argv_free (char **argv) -{ - int i; - - for (i = 0; argv[i]; i++) - free (argv[i]); - free (argv); -} - -/* Make a argv an make string separated by ' '. */ - -int -argcv_string (int argc, char **argv, char **pstring) -{ - size_t i, j, len; - char *buffer; - - /* No need. */ - if (pstring == NULL) - return EINVAL; - - buffer = malloc (1); - if (buffer == NULL) - return ENOMEM; - *buffer = '\0'; - - for (len = i = j = 0; i < argc; i++) - { - int quote; - int toklen; - - toklen = argcv_quoted_length (argv[i], "e); - - len += toklen + 2; - if (quote) - len += 2; - - buffer = realloc (buffer, len); - if (buffer == NULL) - return ENOMEM; - - if (i != 0) - buffer[j++] = ' '; - if (quote) - buffer[j++] = '"'; - argcv_quote_copy (buffer + j, argv[i]); - j += toklen; - if (quote) - buffer[j++] = '"'; - } - - for (; j > 0 && isspace (buffer[j-1]); j--) - ; - buffer[j] = 0; - if (pstring) - *pstring = buffer; - return 0; -} - -void -argcv_remove (int *pargc, char ***pargv, - int (*sel) (const char *, void *), void *data) -{ - int i, j; - int argc = *pargc; - char **argv = *pargv; - int cnt = 0; - - for (i = j = 0; i < argc; i++) - { - if (sel (argv[i], data)) - { - free (argv[i]); - cnt++; - } - else - { - if (i != j) - argv[j] = argv[i]; - j++; - } - } - if (i != j) - argv[j] = NULL; - argc -= cnt; - - *pargc = argc; - *pargv = argv; -} - - diff --git a/gconf/argcv.h b/gconf/argcv.h deleted file mode 100644 index 88bcb90..0000000 --- a/gconf/argcv.h +++ /dev/null @@ -1,79 +0,0 @@ -/* GNU Mailutils -- a suite of utilities for electronic mail - Copyright (C) 1999, 2000, 2001, 2005, 2007, - 2008, 2009 Free Software Foundation, Inc. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General - Public License along with this library; if not, write to the - Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301 USA */ - -#ifndef _ARGCV_H -#define _ARGCV_H 1 - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - /* Treat whitespace as delimiters */ -#define ARGCV_WS 0x01 - /* Handle quotes and escape directives */ -#define ARGCV_QUOTE 0x02 - /* replace each input sequence of repeated delimiters into a single - delimiter */ -#define ARGCV_SQUEEZE_DELIMS 0x04 - /* Return delimiters */ -#define ARGCV_RETURN_DELIMS 0x08 - /* Treat sed expressions as words */ -#define ARGCV_SED_EXPR 0x10 - -#define ARGCV_DEFFLAGS \ - (ARGCV_WS | ARGCV_QUOTE | ARGCV_SQUEEZE_DELIMS | ARGCV_RETURN_DELIMS) - -enum argcv_quoting_style { - argcv_quoting_octal, - argcv_quoting_hex -}; - -extern enum argcv_quoting_style argcv_quoting_style; - -extern int argcv_get (const char *command, const char *delim, - const char *cmnt, - int *argc, char ***argv); -extern int argcv_get_n (const char *command, int len, - const char *delim, const char *cmnt, - int *argc, char ***argv); -extern int argcv_get_np (const char *command, int len, - const char *delim, const char *cmnt, - int flags, - int *pargc, char ***pargv, char **endp); - -extern int argcv_string (int argc, char **argv, char **string); -extern void argcv_free (int argc, char **argv); -extern void argv_free (char **argv); - -extern int argcv_unquote_char (int c); -extern int argcv_quote_char (int c); -extern size_t argcv_quoted_length (const char *str, int *quote); -extern void argcv_unquote_copy (char *dst, const char *src, size_t n); -extern void argcv_quote_copy (char *dst, const char *src); -extern void argcv_remove (int *pargc, char ***pargv, - int (*sel) (const char *, void *), void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _ARGCV_H */ diff --git a/gconf/gconf-lex.l b/gconf/gconf-lex.l index 1cdaada..1379640 100644 --- a/gconf/gconf-lex.l +++ b/gconf/gconf-lex.l @@ -31,7 +31,7 @@ #define obstack_chunk_free free #include #include -#include +#include #if ENABLE_NLS # include "gettext.h" @@ -423,28 +423,23 @@ static void parse_line (char *text, gconf_locus_t *ploc, size_t *pxlines) { int rc = 1; - int argc; - char **argv; - - while (*text && isspace (*text)) - text++; - text++; + struct wordsplit ws; - if (argcv_get (text, "", NULL, &argc, &argv)) + if (wordsplit (text, &ws, WRDSF_DEFFLAGS)) gconf_error (ploc, 0, _("cannot parse #line line")); else { - if (argc == 2) - rc = assign_locus (ploc, NULL, argv[1], pxlines); - else if (argc == 3) - rc = assign_locus (ploc, argv[2], argv[1], pxlines); - else if (argc == 4) + if (ws.ws_wordc == 2) + rc = assign_locus (ploc, NULL, ws.ws_wordv[1], pxlines); + else if (ws.ws_wordc == 3) + rc = assign_locus (ploc, ws.ws_wordv[2], ws.ws_wordv[1], pxlines); + else if (ws.ws_wordc == 4) { - rc = assign_locus (ploc, argv[2], argv[1], 0); + rc = assign_locus (ploc, ws.ws_wordv[2], ws.ws_wordv[1], 0); if (rc == 0) { char *p; - unsigned long x = strtoul (argv[3], &p, 10); + unsigned long x = strtoul (ws.ws_wordv[3], &p, 10); rc = *p != 0; if (rc == 0) *pxlines = x; @@ -455,25 +450,27 @@ parse_line (char *text, gconf_locus_t *ploc, size_t *pxlines) if (rc) gconf_error (ploc, 0, _("malformed #line statement")); + wordsplit_free (&ws); } - argcv_free (argc, argv); } static void parse_line_cpp (char *text, gconf_locus_t *ploc, size_t *pxlines) { - int argc; - char **argv; + struct wordsplit ws; - if (argcv_get (text, "", NULL, &argc, &argv)) - gconf_error (ploc, 0, _("cannot parse #line line")); - else if (argc < 3) + if (wordsplit (text, &ws, WRDSF_DEFFLAGS)) + { + gconf_error (ploc, 0, _("cannot parse #line line")); + return; + } + else if (ws.ws_wordc < 3) gconf_error (ploc, 0, _("invalid #line statement")); else { - if (assign_locus (ploc, argv[2], argv[1], pxlines)) + if (assign_locus (ploc, ws.ws_wordv[2], ws.ws_wordv[1], pxlines)) gconf_error (ploc, 0, _("malformed #line statement")); } - argcv_free (argc, argv); + wordsplit_free (&ws); } diff --git a/gconf/gconf-preproc.c b/gconf/gconf-preproc.c index eb50474..5bd0a95 100644 --- a/gconf/gconf-preproc.c +++ b/gconf/gconf-preproc.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #if ENABLE_NLS # include "gettext.h" @@ -532,22 +532,24 @@ try_file (const char *name, int allow_cwd, int err_not_found, char **newp) static int parse_include (const char *text, int once) { - int argc; - char **argv; + struct wordsplit ws; char *tmp = NULL; char *p = NULL; int rc = 1; - if (argcv_get (text, "", NULL, &argc, &argv)) + if (wordsplit (text, &ws, WRDSF_DEFFLAGS)) gconf_error (&LOCUS, 0, _("Cannot parse include line")); - else if (argc != 2) - gconf_error (&LOCUS, 0, _("invalid include statement")); + else if (ws.ws_wordc != 2) + { + wordsplit_free (&ws); + gconf_error (&LOCUS, 0, _("invalid include statement")); + } else { size_t len; int allow_cwd; - p = argv[1]; + p = ws.ws_wordv[1]; len = strlen (p); if (p[0] == '<' && p[len - 1] == '>') @@ -566,7 +568,7 @@ parse_include (const char *text, int once) if (p) rc = push_source (p, once); free (tmp); - argcv_free (argc, argv); + wordsplit_free (&ws); return rc; } diff --git a/gconf/wordsplit.c b/gconf/wordsplit.c new file mode 100644 index 0000000..a5bb13e --- /dev/null +++ b/gconf/wordsplit.c @@ -0,0 +1,568 @@ +/* wordsplit - a word splitter + Copyright (C) 2009 Sergey Poznyakoff + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include +#define _(msgid) gettext (msgid) +#include + +#define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n') +#define isdelim(c,delim) (strchr(delim,(c))!=NULL) + +#define _ARGCV_WORD_SED_EXPR 0x10000 +#define _ARGCV_WORD_MASK 0xf0000 + +#define ALLOC_INIT 128 +#define ALLOC_INCR 128 + +static int +wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags) +{ + wsp->ws_flags = flags; + if ((wsp->ws_flags & (WRDSF_NOVAR|WRDSF_NOCMD)) + != (WRDSF_NOVAR|WRDSF_NOCMD)) + { + if (wsp->ws_flags & WRDSF_SHOWERR) + error (0, 0, + _("variable expansion and command substitution " + "are not yet supported")); + errno = EINVAL; + return 1; + } + + wsp->ws_input = input; + wsp->ws_len = len; + + if (!(wsp->ws_flags & WRDSF_DOOFFS)) + wsp->ws_offs = 0; + + if (!(wsp->ws_flags & WRDSF_DELIM)) + wsp->ws_delim = " "; + + if (!(wsp->ws_flags & WRDSF_COMMENT)) + wsp->ws_comment = NULL; + + if (wsp->ws_flags & WRDSF_REUSE) + { + wsp->ws_wordn = wsp->ws_wordc + 1; + if (wsp->ws_flags & WRDSF_DOOFFS) + wsp->ws_wordn += wsp->ws_offs; + if (!(wsp->ws_flags & WRDSF_APPEND)) + wsp->ws_wordc = 0; + } + else + { + wsp->ws_wordv = NULL; + wsp->ws_wordc = 0; + wsp->ws_wordn = 0; + } + + wsp->ws_endp = 0; + return 0; +} + +static int +alloc_space (struct wordsplit *wsp) +{ + size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; + char **ptr; + size_t newalloc; + + if (wsp->ws_wordv == NULL) + { + newalloc = offs + ALLOC_INIT; + ptr = calloc (newalloc, sizeof (ptr[0])); + } + else if (wsp->ws_wordn < offs + wsp->ws_wordc + 1) + { + newalloc = offs + wsp->ws_wordc + ALLOC_INCR; + ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); + } + else + return 0; + + if (ptr) + { + wsp->ws_wordn = newalloc; + wsp->ws_wordv = ptr; + } + else + { + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + xalloc_die (); + else if (wsp->ws_flags & WRDSF_SHOWERR) + error (0, 0, _("memory exhausted")); + errno = ENOMEM; + return 1; + } + return 0; +} + +static int +skip_sed_expr(const char *command, size_t i, size_t len) +{ + int state; + + do + { + int delim; + + if (command[i] == ';') + i++; + if (!(command[i] == 's' && i + 3 < len && c_ispunct(command[i+1]))) + break; + + delim = command[++i]; + state = 1; + for (i++; i < len; i++) + { + if (state == 3) + { + if (command[i] == delim || !c_isalnum(command[i])) + break; + } + else if (command[i] == '\\') + i++; + else if (command[i] == delim) + state++; + } + } + while (state == 3 && i < len && command[i] == ';'); + return i; +} + +static size_t +skip_delim (struct wordsplit *wsp) +{ + size_t start = wsp->ws_endp; + if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) + { + do + start++; + while (start < wsp->ws_len + && isdelim (wsp->ws_input[start], wsp->ws_delim)); + start--; + } + + if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS)) + start++; + + return start; +} + +#define _WRDS_WORD 1 +#define _WRDS_CONT 2 + +static int +scan_word (struct wordsplit *wsp, size_t *pstart, size_t *pend) +{ + size_t start = *pstart; + size_t len = wsp->ws_len; + const char *command = wsp->ws_input; + const char *delim = wsp->ws_delim; + const char *comment = wsp->ws_comment; + + size_t i = start; + + if (i >= len) + return WRDSE_EOF; + + if (wsp->ws_flags & WRDSF_WS) + { + /* Skip initial whitespace */ + while (i < len && isws (command[i])) + i++; + } + + start = i; + + wsp->ws_flags &= ~_ARGCV_WORD_MASK; + + if (wsp->ws_flags & WRDSF_SED_EXPR + && command[i] == 's' && i + 3 < len && c_ispunct (command[i+1])) + { + wsp->ws_flags |= _ARGCV_WORD_SED_EXPR; + i = skip_sed_expr (command, i, len); + } + else if (!isdelim (command[i], delim)) + { + while (i < len) + { + if (comment && strchr (comment, command[i]) != NULL) + { + size_t j; + for (j = i + 1; j < len && command[j] != '\n'; j++) + ; + *pstart = start; + *pend = i; + wsp->ws_endp = j; + return i > start ? _WRDS_WORD : _WRDS_CONT; + } + + if (wsp->ws_flags & WRDSF_QUOTE) + { + if (command[i] == '\\') + { + if (++i == len) + break; + i++; + continue; + } + + if (command[i] == '\'' || command[i] == '"') + { + size_t j; + for (j = i + 1; j < len && command[j] != command[i]; j++) + if (command[j] == '\\') + j++; + if (j < len && command[j] == command[i]) + i = j + 1; + else + { + wsp->ws_endp = i; + if (wsp->ws_flags & WRDSF_SHOWERR) + error (0, 0, + _("missing closing %c (start near #%lu)"), + command[i], (unsigned long) i); + return WRDSE_QUOTE; + } + } + } + + if (((wsp->ws_flags & WRDSF_WS) && isws (command[i])) + || isdelim (command[i], delim)) + break; + else + i++; + } + } + else if (wsp->ws_flags & WRDSF_RETURN_DELIMS) + i++; + + *pstart = start; + *pend = i; + wsp->ws_endp = i; + + return _WRDS_WORD; +} + +static char quote_transtab[] = "\\\\a\ab\bf\fn\nr\rt\tv\v"; + +int +wordsplit_unquote_char (int c) +{ + char *p; + + for (p = quote_transtab; *p; p += 2) + { + if (*p == c) + return p[1]; + } + return c; +} + +int +wordsplit_quote_char (int c) +{ + char *p; + + for (p = quote_transtab + sizeof(quote_transtab) - 2; + p > quote_transtab; p -= 2) + { + if (*p == c) + return p[-1]; + } + return -1; +} + +#define to_num(c) \ + (isdigit(c) ? c - '0' : (isxdigit(c) ? toupper(c) - 'A' + 10 : 255 )) + +static int +xtonum (int *pval, const char *src, int base, int cnt) +{ + int i, val; + + for (i = 0, val = 0; i < cnt; i++, src++) + { + int n = *(unsigned char*)src; + if (n > 127 || (n = to_num(n)) >= base) + break; + val = val*base + n; + } + *pval = val; + return i; +} + +size_t +wordsplit_quoted_length (const char *str, int quote_hex, int *quote) +{ + size_t len = 0; + + *quote = 0; + for (; *str; str++) + { + if (*str == ' ') + { + len++; + *quote = 1; + } + else if (*str == '"') + { + len += 2; + *quote = 1; + } + else if (*str != '\t' && *str != '\\' && isprint (*str)) + len++; + else if (quote_hex) + len += 3; + else + { + if (wordsplit_quote_char (*str) != -1) + len += 2; + else + len += 4; + } + } + return len; +} + +void +wordsplit_unquote_copy (char *dst, const char *src, size_t n) +{ + int i = 0; + int c; + int expect_delim = 0; + + while (i < n) + { + switch (src[i]) + { + case '\'': + case '"': + if (!expect_delim) + { + const char *p; + + for (p = src+i+1; *p && *p != src[i]; p++) + if (*p == '\\') + p++; + if (*p) + expect_delim = src[i++]; + else + *dst++ = src[i++]; + } + else if (expect_delim == src[i]) + ++i; + else + *dst++ = src[i++]; + break; + + case '\\': + ++i; + if (src[i] == 'x' || src[i] == 'X') + { + if (n - i < 2) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum(&c, src + i + 1, 16, 2); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off + 1; + } + } + } + else if ((unsigned char)src[i] < 128 && isdigit (src[i])) + { + if (n - i < 1) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum (&c, src+i, 8, 3); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off; + } + } + } + else + *dst++ = wordsplit_unquote_char (src[i++]); + break; + + default: + *dst++ = src[i++]; + } + } + *dst = 0; +} + +void +wordsplit_quote_copy (char *dst, const char *src, int quote_hex) +{ + for (; *src; src++) + { + if (*src == '"') + { + *dst++ = '\\'; + *dst++ = *src; + } + else if (*src != '\t' && *src != '\\' && isprint (*src)) + *dst++ = *src; + else + { + char tmp[4]; + + if (quote_hex) + { + snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char*)src); + memcpy (dst, tmp, 3); + dst += 3; + } + else + { + int c = wordsplit_quote_char (*src); + *dst++ = '\\'; + if (c != -1) + *dst++ = c; + else + { + snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src); + memcpy (dst, tmp, 3); + dst += 3; + } + } + } + } +} + +int +wordsplit_len (const char *command, size_t len, struct wordsplit *wsp, + int flags) +{ + int rc; + size_t start = 0, end = 0; + + rc = wordsplit_init (wsp, command, len, flags); + if (rc) + return rc; + + for (; (rc = scan_word (wsp, &start, &end)) > 0; start = skip_delim (wsp)) + { + int unquote = 1; + size_t n; + char *p; + + if (rc == _WRDS_CONT) + continue; + + if (alloc_space (wsp)) + return WRDSE_NOSPACE; + + n = end - start; + + if (wsp->ws_flags & WRDSF_QUOTE && + !(wsp->ws_flags & _ARGCV_WORD_SED_EXPR)) + { + if (start < end + && (command[start] == '"' || command[start] == '\'') + && command[end-1] == command[start]) + { + unquote = command[start] == '"'; + start++; + n -= 2; + } + } + else + unquote = 0; + + p = malloc (n + 1); + if (!p) + { + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + xalloc_die (); + if (wsp->ws_flags & WRDSF_SHOWERR) + error (0, 0, _("memory exhausted")); + if (!(wsp->ws_flags & WRDSF_REUSE)) + wordsplit_free (wsp); + errno = ENOMEM; + return WRDSE_NOSPACE; + } + + if (unquote) + wordsplit_unquote_copy (p, &command[start], n); + else + { + memcpy (p, &command[start], n); + p[n] = 0; + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = p; + wsp->ws_wordc++; + + ; + } + if (alloc_space (wsp)) + return WRDSE_NOSPACE; + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + /* FIXME: if (rc) free(ws) */ + return rc; +} + +int +wordsplit (const char *command, struct wordsplit *ws, int flags) +{ + return wordsplit_len (command, strlen (command), ws, flags); +} + +void +wordsplit_free (struct wordsplit *ws) +{ + free (ws->ws_wordv); + ws->ws_wordv = NULL; +} + + + diff --git a/gconf/wordsplit.h b/gconf/wordsplit.h new file mode 100644 index 0000000..d4d1f0c --- /dev/null +++ b/gconf/wordsplit.h @@ -0,0 +1,88 @@ +/* wordsplit - a word splitter + Copyright (C) 2009 Sergey Poznyakoff + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . */ + +struct wordsplit +{ + size_t ws_wordc; + char **ws_wordv; + size_t ws_offs; + size_t ws_wordn; + int ws_flags; + const char *ws_delim; + const char *ws_comment; + + const char *ws_input; + size_t ws_len; + size_t ws_endp; +}; + +/* Append the words found to the array resulting from a previous + call. */ +#define WRDSF_APPEND 0x0001 +/* Insert we_offs initial NULLs in the array ws_wordv. + (These are not counted in the returned ws_wordc.) */ +#define WRDSF_DOOFFS 0x0002 +/* Don't do command substitution. Reserved for future use. */ +#define WRDSF_NOCMD 0x0004 +/* The parameter p resulted from a previous call to + wordsplit(), and wordsplit_free() was not called. Reuse the + allocated storage. */ +#define WRDSF_REUSE 0x0008 +/* Print errors */ +#define WRDSF_SHOWERR 0x0010 +/* Consider it an error if an undefined shell variable + is expanded. */ +#define WRDSF_UNDEF 0x0020 + +/* Don't do variable expansion. Reserved for future use. */ +#define WRDSF_NOVAR 0x0040 +/* Abort on ENOMEM error */ +#define WRDSF_ENOMEMABRT 0x0080 +/* Treat whitespace as delimiters */ +#define WRDSF_WS 0x0100 +/* Handle quotes and escape directives */ +#define WRDSF_QUOTE 0x0200 +/* Replace each input sequence of repeated delimiters with a single + delimiter */ +#define WRDSF_SQUEEZE_DELIMS 0x0400 +/* Return delimiters */ +#define WRDSF_RETURN_DELIMS 0x0800 +/* Treat sed expressions as words */ +#define WRDSF_SED_EXPR 0x1000 +/* ws_delim field is initialized */ +#define WRDSF_DELIM 0x2000 +/* ws_comment field is initialized */ +#define WRDSF_COMMENT 0x4000 + +#define WRDSF_DEFFLAGS \ + (WRDSF_NOVAR | WRDSF_NOCMD | \ + WRDSF_WS | WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS) + +#define WRDSE_EOF 0 +#define WRDSE_QUOTE 1 +#define WRDSE_NOSPACE 2 + +int wordsplit (const char *s, struct wordsplit *p, int flags); +void wordsplit_free (struct wordsplit *p); + +int wordsplit_unquote_char (int c); +int wordsplit_quote_char (int c); +size_t wordsplit_quoted_length (const char *str, int quote_hex, int *quote); +void wordsplit_unquote_copy (char *dst, const char *src, size_t n); +void wordsplit_quote_copy (char *dst, const char *src, int quote_hex); + + + -- cgit v1.2.1