diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-02-12 09:24:33 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-02-12 09:40:03 +0200 |
commit | 2fa19a04b42557ffc78e47a7d53f2911553c8124 (patch) | |
tree | 48d16c1473009e42e1080d5f0a33fbecbed771bd | |
parent | bcd0331d29201fc749fb1edaeede1ca035f13b2d (diff) | |
download | fileserv-2fa19a04b42557ffc78e47a7d53f2911553c8124.tar.gz fileserv-2fa19a04b42557ffc78e47a7d53f2911553c8124.tar.bz2 |
Introduce per-directory configuration files.
* src/fileserv.c: Use sys/queue.h to maintain lists. The header (or
its like) will in the future be included in the sources.
(index_file): Remove.
(get_file_name): Rewrite. Take into account directory configuration.
* src/fileserv.h (DIRCONFIG): New data type.
(DIRCONFIG_INITIALIZER): New macro.
(xstrdup)
(dirconfig, dirconfig_parse)
(dirconfig_init, dirconfig_free)
(catfile_n, catfile): New functions.
* src/Makefile.am: Add new sources.
* src/catfile.c: New file.
* src/dirconfig.c: New file.
* src/wordsplit.c: New file.
* src/wordsplit.h: New file.
-rw-r--r-- | src/Makefile.am | 3 | ||||
-rw-r--r-- | src/catfile.c | 29 | ||||
-rw-r--r-- | src/dirconfig.c | 232 | ||||
-rw-r--r-- | src/fileserv.c | 169 | ||||
-rw-r--r-- | src/fileserv.h | 19 | ||||
-rw-r--r-- | src/wordsplit.c | 2383 | ||||
-rw-r--r-- | src/wordsplit.h | 261 |
7 files changed, 3023 insertions, 73 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index b639e8b..b9dcfe8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,6 @@ bin_PROGRAMS=fileserv -fileserv_SOURCES=fileserv.c runas.c fileserv.h logger.c pidfile.c +fileserv_SOURCES=fileserv.c runas.c fileserv.h logger.c pidfile.c\ + wordsplit.c wordsplit.h catfile.c dirconfig.c if FSRV_WRAP fileserv_SOURCES += wrapacl.c endif diff --git a/src/catfile.c b/src/catfile.c new file mode 100644 index 0000000..56ee0a9 --- /dev/null +++ b/src/catfile.c @@ -0,0 +1,29 @@ +#include <stdlib.h> +#include <string.h> +#include "fileserv.h" + +char * +catfile_n(char const *dir, size_t len, char const *file) +{ + size_t sz; + char *p; + + sz = len; + if (sz > 0 && dir[sz-1] != '/') + sz++; + if (*file == '/') + file++; + sz += strlen(file); + p = xmalloc(sz + 1); + memcpy(p, dir, len); + if (p[len-1] != '/') + p[len++] = '/'; + strcpy(p + len, file); + return p; +} + +char * +catfile(char const *dir, char const *file) +{ + return catfile_n(dir, strlen(dir), file); +} diff --git a/src/dirconfig.c b/src/dirconfig.c new file mode 100644 index 0000000..a761401 --- /dev/null +++ b/src/dirconfig.c @@ -0,0 +1,232 @@ +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <sys/stat.h> +#include "fileserv.h" +#include "wordsplit.h" + +char *dotfile = ".fileserv"; + +DIRCONFIG * +dirconfig(char const *path, size_t prefix_len) +{ + DIRCONFIG *conf = dirconfig_init(); + char *tmp = NULL; + struct stat st; + + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + tmp = catfile(path, "."); + path = tmp; + } + while (path[prefix_len]) { + char *name = catfile_n(path, prefix_len, dotfile); + dirconfig_parse(name, conf); + free(name); + if (path[prefix_len] == '/') + prefix_len++; + while (path[prefix_len] && path[prefix_len] != '/') + prefix_len++; + } + free(tmp); + return conf; +} + +static int +bool_decode(char const *s) +{ + static char *yes[] = { + "1", "yes", "true", "on", NULL + }; + static char *no[] = { + "0", "no", "false", "off", NULL + }; + int i; + for (i = 0; yes[i]; i++) { + if (strcasecmp(s, yes[i]) == 0) + return 1; + } + for (i = 0; no[i]; i++) { + if (strcasecmp(s, no[i]) == 0) + return 0; + } + return -1; +} + +static int +bad_argc(const char *file, int line, char const *id) +{ + error("%s:%d: bad number of arguments to '%s'", file, line, id); + return 1; +} + +static int +not_a_bool(const char *file, int line) +{ + error("%s:%d: invalid boolean value", file, line); + return 1; +} + +static int +set_follow(size_t argc, char **argv, DIRCONFIG *conf, + char const *file, int line) +{ + int n; + if (argc != 2) + return bad_argc(file, line, argv[0]); + n = bool_decode(argv[1]); + if (n == -1) + return not_a_bool(file, line); + conf->follow = n; + return 0; +} + +static int +set_listing(size_t argc, char **argv, DIRCONFIG *conf, + char const *file, int line) +{ + int n; + if (argc != 2) + return bad_argc(file, line, argv[0]); + n = bool_decode(argv[1]); + if (n == -1) + return not_a_bool(file, line); + conf->listing = n; + return 0; +} + +static int +set_list_unreadable(size_t argc, char **argv, DIRCONFIG *conf, + char const *file, int line) +{ + int n; + if (argc != 2) + return bad_argc(file, line, argv[0]); + n = bool_decode(argv[1]); + if (n == -1) + return not_a_bool(file, line); + conf->list_unreadable = n; + return 0; +} + +static int +set_index_files(size_t argc, char **argv, DIRCONFIG *conf, + char const *file, int line) +{ + size_t i; + + if (argc < 2) + return bad_argc(file, line, argv[0]); + conf->index_files = xcalloc(argc, sizeof(conf->index_files)); + for (i = 1; i < argc; i++) + conf->index_files[i-1] = xstrdup(argv[i]); + conf->index_files[i-1] = NULL; + return 0; +} + +struct dirconfig_keyword { + char const *ident; + int (*setter)(size_t argc, char **argv, DIRCONFIG *conf, + char const *file, int line); +}; + +static struct dirconfig_keyword keywords[] = { + { "directory-index", set_index_files }, + { "follow", set_follow }, + { "listing", set_listing }, + { "list-unreadable", set_list_unreadable }, + { NULL } +}; + +static int +line_interpret(struct wordsplit const *ws, + DIRCONFIG *conf, char const *file, int line) +{ + struct dirconfig_keyword *kw; + for (kw = keywords; kw->ident; kw++) { + if (strcmp(kw->ident, ws->ws_wordv[0]) == 0) + return kw->setter(ws->ws_wordc, ws->ws_wordv, + conf, file, line); + } + error("%s:%d: unrecognized keyword", file, line); + return 1; +} + +void +dirconfig_parse(const char *file, DIRCONFIG *conf) +{ + FILE *fp; + char buf[1024]; + unsigned ln = 0; + int wsflags = WRDSF_DEFFLAGS; + struct wordsplit ws; + + fp = fopen(file, "r"); + if (!fp) { + if (errno != ENOENT) + error("can't open file %s: %s", file, + strerror(errno)); + return; + } + + ws.ws_comment = "#"; + wsflags |= WRDSF_COMMENT; + + while (fgets(buf, sizeof(buf), fp)) { + int len; + + ln++; + len = strlen(buf); + if (len == 0) + continue; + if (buf[len-1] == '\n') + buf[--len] = 0; + else if (!feof(fp)) { + error("%s:%d: line too long", file, ln); + break; + } + + if (wordsplit(buf, &ws, wsflags)) { + error("%s:%d: %s", file, ln, wordsplit_strerror(&ws)); + break; + } + + wsflags |= WRDSF_REUSE; + if (ws.ws_wordc == 0) + continue; + if (line_interpret(&ws, conf, file, ln)) { + break; + } + } + + if (wsflags & WRDSF_REUSE) + wordsplit_free(&ws); + + if (ferror(fp)) { + error("%s: %s", file, strerror(errno)); + } + fclose(fp); +} + +DIRCONFIG * +dirconfig_init(void) +{ + DIRCONFIG *p = xcalloc(1, sizeof(*p)); + p->list_unreadable = 1; + return p; +} + +void +dirconfig_free(DIRCONFIG *conf) +{ + if (conf->index_files) { + size_t i; + for (i = 0; conf->index_files[i]; i++) + free(conf->index_files[i]); + free(conf->index_files); + } + free(conf); +} + + + diff --git a/src/fileserv.c b/src/fileserv.c index 0749ab4..dc218de 100644 --- a/src/fileserv.c +++ b/src/fileserv.c @@ -36,14 +36,13 @@ #include "fileserv.h" #include "mimetypes.h" +#include <sys/queue.h> char *progname; int verbose; /* reserved for future use */ char *address = "0.0.0.0"; char *forwarded_header = "X-Forwarded-For"; -char *index_file; - #ifndef DEFAULT_SERVICE # define DEFAULT_SERVICE "8080" #endif @@ -97,6 +96,15 @@ xrealloc(void *ptr, size_t size) return ptr; } +char * +xstrdup(char const *str) +{ + char *ptr = strdup(str); + if (!ptr) + xmalloc_fail(); + return ptr; +} + void usage(void) { @@ -188,19 +196,18 @@ struct urimap { size_t uri_len; char *dir; size_t dir_len; - int dir_index; - struct urimap *next; + TAILQ_ENTRY(urimap) next; }; -struct urimap *map_head, *map_tail; +TAILQ_HEAD(,urimap) map_head = TAILQ_HEAD_INITIALIZER(map_head); struct urimap const * urimap_find(char const *host, char const *url) { struct urimap *map; size_t len = strlen(url); - - for (map = map_head; map; map = map->next) { + + TAILQ_FOREACH(map, &map_head, next) { if (map->host && strcasecmp(map->host, host)) continue; if (map->uri_len > len) @@ -217,8 +224,8 @@ urimap_find_dir(char const *host, char const *filename) { struct urimap *map; size_t len = strlen(filename); - - for (map = map_head; map; map = map->next) { + + TAILQ_FOREACH(map, &map_head, next) { if (map->host && strcasecmp(map->host, host)) continue; if (map->dir_len > len) @@ -266,12 +273,7 @@ urimap_add(char *arg) map->dir[--map->dir_len] = 0; } - map->next = NULL; - if (map_tail) - map_tail->next = map; - else - map_head = map; - map_tail = map; + TAILQ_INSERT_TAIL(&map_head, map, next); } static char * @@ -281,46 +283,94 @@ cfname(char const *fname) return realpath(fname, NULL); } + +static int +find_index_file(char const *dir, DIRCONFIG *conf, char **index_file, + struct stat *pst) +{ + char *cf; + + if (conf->index_files) { + int i; + int lasterr = 0; + for (i = 0; conf->index_files[i]; i++) { + struct stat st; + cf = catfile(dir, conf->index_files[i]); + if (access(cf, F_OK) || lstat(cf, &st)) { + free(cf); + lasterr = errno; + continue; + } + if (S_ISDIR(st.st_mode)) { + free(cf); + lasterr = ENOENT; + continue; + } + *index_file = cf; + *pst = st; + return MHD_HTTP_OK; + } + } + return MHD_HTTP_FORBIDDEN; +} + +static inline int +errno_to_http_code(int ec) +{ + return ec == ENOENT ? MHD_HTTP_NOT_FOUND : MHD_HTTP_FORBIDDEN; +} + int -get_file_name(char const *host, char const *url, char **fname) +get_file_name(char const *host, char const *url, char **fname, struct stat *stp) { struct urimap const *map; char *file_name; - char const *basename; - size_t len; char *cf; - + struct stat st; + DIRCONFIG *conf; + map = urimap_find(host, url); if (!map) return MHD_HTTP_NOT_FOUND; - basename = url + map->uri_len; - if (basename[0] == 0 || (basename[0] == '/' && basename[1] == 0)) { - if (index_file) - basename = index_file; - else + file_name = catfile_n(map->dir, map->dir_len, url + map->uri_len); + if (lstat(file_name, &st)) + return errno_to_http_code(errno); + + conf = dirconfig(file_name, map->dir_len); + + if (S_ISDIR(st.st_mode)) { + int res = find_index_file(file_name, conf, &cf, &st); + if (res == MHD_HTTP_OK) { + free(file_name); + file_name = cf; + } else if (conf->listing) { + //FIXME return MHD_HTTP_FORBIDDEN; + } else + return res; } - len = map->dir_len + strlen(basename); - file_name = xmalloc(len + 1); - if (map->dir_len) - memcpy(file_name, map->dir, map->dir_len); - strcpy(file_name + map->dir_len, basename); - - cf = cfname(file_name); - if (!cf) { - free(cf); - return MHD_HTTP_NOT_FOUND; - } else if (strcmp(cf, file_name) == 0) { - free(cf); - } else if (urimap_find_dir(host, cf)) { - free(file_name); - file_name = cf; - } else { - free(file_name); - free(cf); - return MHD_HTTP_NOT_FOUND; - } + + if (S_ISLNK(st.st_mode)) { + if (!conf->follow) + return MHD_HTTP_FORBIDDEN; + cf = cfname(file_name); + if (!cf) { + free(file_name); + return MHD_HTTP_NOT_FOUND; + } else if (urimap_find_dir(host, cf)) { + free(file_name); + file_name = cf; + } else { + free(file_name); + free(cf); + return MHD_HTTP_NOT_FOUND; + } + } else if (!S_ISREG(st.st_mode)) + return MHD_HTTP_FORBIDDEN; + *fname = file_name; + *stp = st; + return MHD_HTTP_OK; } @@ -681,12 +731,6 @@ http_error(struct MHD_Connection *connection, return ret; } -static inline int -errno_to_http_code(void) -{ - return errno == ENOENT ? MHD_HTTP_NOT_FOUND : MHD_HTTP_FORBIDDEN; -} - static int fileserv_handler(void *cls, struct MHD_Connection *conn, @@ -717,7 +761,7 @@ fileserv_handler(void *cls, } *con_cls = NULL; - status = get_file_name(host, url, &file_name); + status = get_file_name(host, url, &file_name, &st); if (status != MHD_HTTP_OK) return http_error(conn, method, url, status, NULL); @@ -725,27 +769,13 @@ fileserv_handler(void *cls, if (fd == -1) { free(file_name); return http_error(conn, method, url, - errno_to_http_code(), + errno_to_http_code(errno), NULL); } type = get_file_type(file_name); free(file_name); - if (fstat(fd, &st)) { - close(fd); - return http_error(conn, method, url, - errno_to_http_code(), - NULL); - } - - if (!S_ISREG(st.st_mode)) { - close(fd); - return http_error(conn, method, url, - MHD_HTTP_FORBIDDEN, - NULL); - } - response = MHD_create_response_from_fd64(st.st_size, fd); if (!response) { close(fd); @@ -789,7 +819,7 @@ main(int argc, char **argv) 0 }; char *mime_types_file = NULL; - + p = strrchr(argv[0], '/'); if (p) progname = p + 1; @@ -812,11 +842,6 @@ main(int argc, char **argv) case 'g': group = optarg; break; - case 'i': - index_file = xmalloc (strlen (optarg) + 2); - index_file[0] = '/'; - strcpy (index_file + 1, optarg); - break; case 'h': usage(); exit(0); diff --git a/src/fileserv.h b/src/fileserv.h index 924a2f8..9502144 100644 --- a/src/fileserv.h +++ b/src/fileserv.h @@ -36,6 +36,7 @@ void xmalloc_fail(void); void *xmalloc(size_t s); void *xcalloc(size_t nmemb, size_t size); void *xrealloc(void *ptr, size_t size); +char *xstrdup(char const *str); void runas(char const *runas_user, char const *runas_group); @@ -48,3 +49,21 @@ int fileserv_acl(void *cls, const struct sockaddr *addr, socklen_t addrlen); void pidfile_remove(void); void pidfile_create(void); void pidfile_check(void); + +typedef struct fileserv_dirconfig { + int follow:1; /* Follow symbolic links */ + int listing:1; /* Show directory listing, if a directory + is requested and no index file exists */ + int list_unreadable:1; /* List unreadable files */ + char **index_files; /* Names of index files */ +} DIRCONFIG; + +#define DIRCONFIG_INITIALIZER { 0, 0, 1, NULL } + +DIRCONFIG *dirconfig(char const *path, size_t prefix_len); +void dirconfig_parse(const char *file, DIRCONFIG *conf); +DIRCONFIG *dirconfig_init(void); +void dirconfig_free(DIRCONFIG *conf); + +char *catfile_n(char const *dir, size_t len, char const *file); +char *catfile(char const *dir, char const *file); diff --git a/src/wordsplit.c b/src/wordsplit.c new file mode 100644 index 0000000..4884a22 --- /dev/null +++ b/src/wordsplit.c @@ -0,0 +1,2383 @@ +/* wordsplit - a word splitter + Copyright (C) 2009-2016 Sergey Poznyakoff + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <pwd.h> +#include <glob.h> + +#if ENABLE_NLS +# include <gettext.h> +#else +# define gettext(msgid) msgid +#endif +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#include <wordsplit.h> + +#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') +#define ISDELIM(ws,c) \ + (strchr ((ws)->ws_delim, (c)) != NULL) +#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) +#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') +#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') +#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) +#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') +#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) +#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) +#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) + +#define ISVARBEG(c) (ISALPHA(c) || c == '_') +#define ISVARCHR(c) (ISALNUM(c) || c == '_') + +#define ALLOC_INIT 128 +#define ALLOC_INCR 128 + +static void +_wsplt_alloc_die (struct wordsplit *wsp) +{ + wsp->ws_error ("%s", _("memory exhausted")); + abort (); +} + +static void +_wsplt_error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); + fputc ('\n', stderr); +} + +static void wordsplit_free_nodes (struct wordsplit *); + +static int +_wsplt_seterr (struct wordsplit *wsp, int ec) +{ + wsp->ws_errno = ec; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return ec; +} + +static int +_wsplt_nomem (struct wordsplit *wsp) +{ + errno = ENOMEM; + wsp->ws_errno = WRDSE_NOSPACE; + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + wsp->ws_alloc_die (wsp); + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + if (!(wsp->ws_flags & WRDSF_REUSE)) + wordsplit_free (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + +static int wordsplit_run (const char *command, size_t length, + struct wordsplit *wsp, + int flags, int lvl); + +static int +_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss, + char const *str, int len, + int flags) +{ + wss->ws_delim = wsp->ws_delim; + wss->ws_debug = wsp->ws_debug; + wss->ws_error = wsp->ws_error; + wss->ws_alloc_die = wsp->ws_alloc_die; + + if (!(flags & WRDSF_NOVAR)) + { + wss->ws_env = wsp->ws_env; + wss->ws_getvar = wsp->ws_getvar; + flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR); + } + if (!(flags & WRDSF_NOCMD)) + { + wss->ws_command = wsp->ws_command; + } + + if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD)) + { + wss->ws_closure = wsp->ws_closure; + flags |= wsp->ws_flags & WRDSF_CLOSURE; + } + + wss->ws_options = wsp->ws_options; + + flags |= WRDSF_DELIM + | WRDSF_ALLOC_DIE + | WRDSF_ERROR + | WRDSF_DEBUG + | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS)); + + return wordsplit_run (str, len, wss, flags, wsp->ws_lvl + 1); +} + +static void +_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss) +{ + if (wsp->ws_errno == WRDSE_USERERR) + free (wsp->ws_usererr); + wsp->ws_errno = wss->ws_errno; + if (wss->ws_errno == WRDSE_USERERR) + { + wsp->ws_usererr = wss->ws_usererr; + wss->ws_errno = WRDSE_EOF; + wss->ws_usererr = NULL; + } +} + +static void +wordsplit_init0 (struct wordsplit *wsp) +{ + if (wsp->ws_flags & WRDSF_REUSE) + { + if (!(wsp->ws_flags & WRDSF_APPEND)) + wordsplit_free_words (wsp); + wordsplit_clearerr (wsp); + } + else + { + wsp->ws_wordv = NULL; + wsp->ws_wordc = 0; + wsp->ws_wordn = 0; + } + + wsp->ws_errno = 0; + wsp->ws_head = wsp->ws_tail = NULL; +} + +char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + +static int +wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags) +{ + wsp->ws_flags = flags; + + if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) + wsp->ws_alloc_die = _wsplt_alloc_die; + if (!(wsp->ws_flags & WRDSF_ERROR)) + wsp->ws_error = _wsplt_error; + + if (!(wsp->ws_flags & WRDSF_NOVAR)) + { + /* These will be initialized on first variable assignment */ + wsp->ws_envidx = wsp->ws_envsiz = 0; + wsp->ws_envbuf = NULL; + } + + if (!(wsp->ws_flags & WRDSF_NOCMD)) + { + if (!wsp->ws_command) + { + _wsplt_seterr (wsp, WRDSE_USAGE); + errno = EINVAL; + return wsp->ws_errno; + } + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + if (!(wsp->ws_flags & WRDSF_DEBUG)) + { + if (wsp->ws_flags & WRDSF_ERROR) + wsp->ws_debug = wsp->ws_error; + else if (wsp->ws_flags & WRDSF_SHOWERR) + wsp->ws_debug = _wsplt_error; + else + wsp->ws_flags &= ~WRDSF_SHOWDBG; + } + } + + wsp->ws_input = input; + wsp->ws_len = len; + + if (!(wsp->ws_flags & WRDSF_DOOFFS)) + wsp->ws_offs = 0; + + if (!(wsp->ws_flags & WRDSF_DELIM)) + wsp->ws_delim = " \t\n"; + + if (!(wsp->ws_flags & WRDSF_COMMENT)) + wsp->ws_comment = NULL; + + if (!(wsp->ws_flags & WRDSF_CLOSURE)) + wsp->ws_closure = NULL; + + if (!(wsp->ws_flags & WRDSF_OPTIONS)) + wsp->ws_options = 0; + + if (wsp->ws_flags & WRDSF_ESCAPE) + { + if (!wsp->ws_escape[WRDSX_WORD]) + wsp->ws_escape[WRDSX_WORD] = ""; + if (!wsp->ws_escape[WRDSX_QUOTE]) + wsp->ws_escape[WRDSX_QUOTE] = ""; + } + else + { + if (wsp->ws_flags & WRDSF_CESCAPES) + { + wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; + wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; + wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD + | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; + } + else + { + wsp->ws_escape[WRDSX_WORD] = ""; + wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\""; + wsp->ws_options |= WRDSO_BSKEEP_QUOTE; + } + } + + wsp->ws_endp = 0; + + wordsplit_init0 (wsp); + + return 0; +} + +static int +alloc_space (struct wordsplit *wsp, size_t count) +{ + size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; + char **ptr; + size_t newalloc; + + if (wsp->ws_wordv == NULL) + { + newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; + ptr = calloc (newalloc, sizeof (ptr[0])); + } + else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) + { + newalloc = offs + wsp->ws_wordc + + (count > ALLOC_INCR ? count : ALLOC_INCR); + ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); + } + else + return 0; + + if (ptr) + { + wsp->ws_wordn = newalloc; + wsp->ws_wordv = ptr; + } + else + return _wsplt_nomem (wsp); + return 0; +} + + +/* Node state flags */ +#define _WSNF_NULL 0x01 /* null node (a noop) */ +#define _WSNF_WORD 0x02 /* node contains word in v.word */ +#define _WSNF_QUOTE 0x04 /* text is quoted */ +#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ +#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ +#define _WSNF_SEXP 0x20 /* is a sed expression */ +#define _WSNF_DELIM 0x40 /* node is a delimiter */ + +#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that + wordsplit_add_segm must add the + segment even if it is empty */ + +struct wordsplit_node +{ + struct wordsplit_node *prev; /* Previous element */ + struct wordsplit_node *next; /* Next element */ + int flags; /* Node flags */ + union + { + struct + { + size_t beg; /* Start of word in ws_input */ + size_t end; /* End of word in ws_input */ + } segm; + char *word; + } v; +}; + +static const char * +wsnode_flagstr (int flags) +{ + static char retbuf[7]; + char *p = retbuf; + + if (flags & _WSNF_WORD) + *p++ = 'w'; + else if (flags & _WSNF_NULL) + *p++ = 'n'; + else + *p++ = '-'; + if (flags & _WSNF_QUOTE) + *p++ = 'q'; + else + *p++ = '-'; + if (flags & _WSNF_NOEXPAND) + *p++ = 'E'; + else + *p++ = '-'; + if (flags & _WSNF_JOIN) + *p++ = 'j'; + else + *p++ = '-'; + if (flags & _WSNF_SEXP) + *p++ = 's'; + else + *p++ = '-'; + if (flags & _WSNF_DELIM) + *p++ = 'd'; + else + *p++ = '-'; + *p = 0; + return retbuf; +} + +static const char * +wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return ""; + else if (p->flags & _WSNF_WORD) + return p->v.word; + else + return wsp->ws_input + p->v.segm.beg; +} + +static size_t +wsnode_len (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return 0; + else if (p->flags & _WSNF_WORD) + return strlen (p->v.word); + else + return p->v.segm.end - p->v.segm.beg; +} + +static int +wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) +{ + struct wordsplit_node *node = calloc (1, sizeof (*node)); + if (!node) + return _wsplt_nomem (wsp); + *pnode = node; + return 0; +} + +static void +wsnode_free (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_WORD) + free (p->v.word); + free (p); +} + +static void +wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) +{ + node->next = NULL; + node->prev = wsp->ws_tail; + if (wsp->ws_tail) + wsp->ws_tail->next = node; + else + wsp->ws_head = node; + wsp->ws_tail = node; +} + +static void +wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p; + + p = node->prev; + if (p) + { + p->next = node->next; + if (!node->next) + p->flags &= ~_WSNF_JOIN; + } + else + wsp->ws_head = node->next; + + p = node->next; + if (p) + p->prev = node->prev; + else + wsp->ws_tail = node->prev; + + node->next = node->prev = NULL; +} + +static void +wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, + struct wordsplit_node *anchor, int before) +{ + if (!wsp->ws_head) + { + node->next = node->prev = NULL; + wsp->ws_head = wsp->ws_tail = node; + } + else if (before) + { + if (anchor->prev) + wsnode_insert (wsp, node, anchor->prev, 0); + else + { + node->prev = NULL; + node->next = anchor; + anchor->prev = node; + wsp->ws_head = node; + } + } + else + { + struct wordsplit_node *p; + + p = anchor->next; + if (p) + p->prev = node; + else + wsp->ws_tail = node; + node->next = p; + node->prev = anchor; + anchor->next = node; + } +} + +static int +wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg) +{ + struct wordsplit_node *node; + int rc; + + if (end == beg && !(flg & _WSNF_EMPTYOK)) + return 0; + rc = wsnode_new (wsp, &node); + if (rc) + return rc; + node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK); + node->v.segm.beg = beg; + node->v.segm.end = end; + wsnode_append (wsp, node); + return 0; +} + +static void +wordsplit_free_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + wsnode_free (p); + p = next; + } + wsp->ws_head = wsp->ws_tail = NULL; +} + +static void +wordsplit_dump_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + int n = 0; + + for (p = wsp->ws_head, n = 0; p; p = p->next, n++) + { + if (p->flags & _WSNF_WORD) + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;", + wsp->ws_lvl, + n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); + else + wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;", + wsp->ws_lvl, + n, p, p->flags, wsnode_flagstr (p->flags), + (int) (p->v.segm.end - p->v.segm.beg), + wsp->ws_input + p->v.segm.beg); + } +} + +static int +coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p, *end; + size_t len = 0; + char *buf, *cur; + int stop; + + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) + { + len += wsnode_len (p); + } + if (p) + len += wsnode_len (p); + end = p; + + buf = malloc (len + 1); + if (!buf) + return _wsplt_nomem (wsp); + cur = buf; + + p = node; + for (stop = 0; !stop;) + { + struct wordsplit_node *next = p->next; + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + + memcpy (cur, str, slen); + cur += slen; + if (p != node) + { + node->flags |= p->flags & _WSNF_QUOTE; + wsnode_remove (wsp, p); + stop = p == end; + wsnode_free (p); + } + p = next; + } + + *cur = 0; + + node->flags &= ~_WSNF_JOIN; + + if (node->flags & _WSNF_WORD) + free (node->v.word); + else + node->flags |= _WSNF_WORD; + node->v.word = buf; + return 0; +} + +static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, + char *dst, const char *src, + size_t n); + +static int +wsnode_quoteremoval (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + int unquote; + + if (wsp->ws_flags & WRDSF_QUOTE) + { + unquote = !(p->flags & _WSNF_NOEXPAND); + } + else + unquote = 0; + + if (unquote) + { + if (!(p->flags & _WSNF_WORD)) + { + char *newstr = malloc (slen + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + p->v.word = newstr; + p->flags |= _WSNF_WORD; + } + + wordsplit_string_unquote_cop |