diff options
Diffstat (limited to 'src/argot-lex.l')
-rw-r--r-- | src/argot-lex.l | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/src/argot-lex.l b/src/argot-lex.l new file mode 100644 index 0000000..c0ffc7e --- /dev/null +++ b/src/argot-lex.l @@ -0,0 +1,443 @@ +/* argot - Gray's Extensible Configuration System -*- c -*- */ +%option nounput +%option noinput +%top { +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +} +%{ +/* argot - Gray's Extensible Configuration System + Copyright (C) 2007-2016 Sergey Poznyakoff + + Grecs is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + Grecs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with Grecs. If not, see <http://www.gnu.org/licenses/>. */ + +#include <argot.h> +#include <argot-gram.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> +#include <stdlib.h> +#include <errno.h> + +#include <wordsplit.h> + +static char *multiline_delimiter; +static size_t multiline_delimiter_len; +static int multiline_unescape; /* Unescape here-document contents */ +static int (*char_to_strip)(char); /* Strip matching characters of each + here-document line */ + +struct argot_locus_point argot_current_locus_point; /* Input file location */ +/* Line correction. Equals to the number of #line directives inserted into + the input by the preprocessor instance. The external preprocessor, if + any, counts these as input lines and therefore the line numbers in *its* + #line directives are offset by the value of XLINES. + + Uff, running two preprocessors is confusing... +*/ +static size_t xlines; + +static void multiline_begin(char *); +static void multiline_add(char *); +static char *multiline_strip_tabs(char *text); +static int ident(void); +static int isemptystr(int off); +static void qstring_locus_fixup(void); + +#define qstring() \ + ((argot_parser_options & ARGOT_OPTION_QUOTED_STRING_CONCAT) \ + ? QSTRING : STRING) + +#undef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + do { \ + if (argot_preprocessor) \ + result = fread(buf, 1, max_size, yyin); \ + else \ + result = argot_preproc_fill_buffer(buf, max_size); \ + } while(0) + +#define YY_USER_ACTION do { \ + if (YYSTATE == 0) { \ + yylloc.beg = argot_current_locus_point; \ + yylloc.beg.col++; \ + } \ + argot_current_locus_point.col += yyleng; \ + yylloc.end = argot_current_locus_point; \ + } while (0); + +%} + +%x COMMENT ML STR + +WS [ \t\f][ \t\f]* +ID [a-zA-Z_][a-zA-Z_0-9-]* +P [1-9][0-9]* + +%% + /* C-style comments */ +"/*" BEGIN(COMMENT); +<COMMENT>[^*\n]* /* eat anything that's not a '*' */ +<COMMENT>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */ +<COMMENT>\n argot_locus_point_advance_line(argot_current_locus_point); +<COMMENT>"*"+"/" BEGIN(INITIAL); + /* Line directive */ +^[ \t]*#[ \t]*{P}[ \t]+\".*\".*\n { argot_parse_line_directive_cpp(yytext, + &yylloc, + &argot_current_locus_point, + &xlines); } +^[ \t]*#[ \t]*line[ \t].*\n { argot_parse_line_directive(yytext, + &yylloc, + &argot_current_locus_point, + &xlines); } + /* End-of-line comments */ +#.*\n { argot_locus_point_advance_line(argot_current_locus_point); } +#.* /* end-of-file comment */; +"//".*\n { argot_locus_point_advance_line(argot_current_locus_point); } +"//".* /* end-of-file comment */; + /* Identifiers */ +<INITIAL>{ID} return ident(); + /* Strings */ +[a-zA-Z0-9_\.\*/:@\[\]-]([a-zA-Z0-9_\./:@\[\]-][a-zA-Z0-9_\.\*/:@\[\]-]*)? { + argot_line_begin(); + argot_line_add(yytext, yyleng); + yylval.string = argot_line_finish(); + return STRING; } + /* Quoted strings */ +\"[^\\"\n]*\" { argot_line_begin(); + argot_line_add(yytext + 1, yyleng - 2); + yylval.string = argot_line_finish(); + qstring_locus_fixup(); + return qstring(); } +\"[^\\"\n]*\\\n { BEGIN(STR); + argot_line_begin(); + argot_line_acc_grow_unescape_last(yytext + 1, + yyleng - 1, + &yylloc); + argot_locus_point_advance_line(argot_current_locus_point); } +\"[^\\"\n]*\\. { BEGIN(STR); + argot_line_begin(); + argot_line_acc_grow_unescape_last(yytext + 1, + yyleng - 1, + &yylloc); } +<STR>\"[^\\"\n]*\\\n { argot_line_acc_grow_unescape_last(yytext, yyleng, + &yylloc); + argot_locus_point_advance_line(argot_current_locus_point); } +<STR>[^\\"\n]*\\. { argot_line_acc_grow_unescape_last(yytext, yyleng, + &yylloc); } +<STR>[^\\"\n]*\" { BEGIN(INITIAL); + if (yyleng > 1) + argot_line_add(yytext, yyleng - 1); + yylval.string = argot_line_finish(); + qstring_locus_fixup(); + return qstring(); } + /* Multiline strings */ +"<<"(-" "?)?\\?{ID}[ \t]*#.*\n | +"<<"(-" "?)?\\?{ID}[ \t]*"//".*\n | +"<<"(-" "?)?\\?{ID}[ \t]*\n | +"<<"(-" "?)?\"{ID}\"[ \t]*#.*\n | +"<<"(-" "?)?\"{ID}\"[ \t]*"//".*\n | +"<<"(-" "?)?\"{ID}\"[ \t]*\n { + BEGIN(ML); + multiline_begin(yytext+2); } + /* Ignore m4 line statements */ +<ML>^"#line ".*\n { + argot_locus_point_advance_line(argot_current_locus_point); +} +<ML>.*\n { char *p = multiline_strip_tabs(yytext); + + if (!strncmp(p, multiline_delimiter, multiline_delimiter_len) + && isemptystr(p + multiline_delimiter_len - yytext)) { + argot_free(multiline_delimiter); + multiline_delimiter = NULL; + BEGIN(INITIAL); + yylval.string = argot_line_finish(); + + /* Update end pos */ + yylloc.end.line--; + for (yylloc.end.col = 0, + p = yylval.string + strlen(yylval.string) - 1; + p > yylval.string && p[-1] != '\n'; + yylloc.end.col++, p--); + if (yylloc.end.col == 0) + yylloc.end.col = 1; + return MSTRING; + } + argot_locus_point_advance_line(argot_current_locus_point); + multiline_add(p); } +{WS} ; + /* Other tokens */ +\n { argot_locus_point_advance_line(argot_current_locus_point); } +[,;{}()] return yytext[0]; +. { if (isascii(yytext[0]) && isprint(yytext[0])) + argot_error(&yylloc, 0, + _("stray character %c"), yytext[0]); + else + argot_error(&yylloc, 0, + _("stray character \\%03o"), + (unsigned char) yytext[0]); } +%% + +pid_t argot_preproc_pid; + +int +yywrap() +{ + if (argot_preprocessor) { + argot_preproc_extrn_shutdown(argot_preproc_pid); + fclose(yyin); + } else + argot_preproc_done(); + argot_current_locus_point.file = NULL; + return 1; +} + +int +argot_lex_begin(const char *name, int trace) +{ + yy_flex_debug = trace; + + argot_line_acc_create(); + + if (argot_preprocessor) { + int fd; + + fd = open(name, O_RDONLY); + if (fd == -1) { + argot_error(NULL, errno, _("Cannot open `%s'"), name); + return 1; + } + close(fd); + + yyin = argot_preproc_extrn_start(name, &argot_preproc_pid); + if (!yyin) { + argot_error(NULL, errno, + _("Unable to start external preprocessor `%s'"), + argot_preprocessor); + return 1; + } + } else + return argot_preproc_init(name); + + return 0; +} + +void +argot_lex_end(int err) +{ + argot_line_acc_free(); +} + +static int +isemptystr(int off) +{ + for (; yytext[off] && isspace(yytext[off]); off++) + ; + if (yytext[off] == ';') { + int i; + for (i = off + 1; yytext[i]; i++) + if (!isspace(yytext[i])) + return 0; + yyless(off); + return 1; + } + return yytext[off] == 0; +} + +char * +multiline_strip_tabs(char *text) +{ + if (char_to_strip) + for (; *text && char_to_strip(*text); text++) + ; + return text; +} + +static void +multiline_add(char *s) +{ + if (multiline_unescape) { + for (; *s; s++) { + if (*s == '\\') { + argot_line_acc_grow_char_unescape(s[1]); + ++s; + } else + argot_line_acc_grow_char(*s); + } + } else + argot_line_add(s, strlen(s)); +} + +static int +is_tab(char c) +{ + return c == '\t'; +} + +static int +is_ws(char c) +{ + return c == '\t' || c == ' '; +} + +void +multiline_begin(char *p) +{ + if (*p == '-') { + if (*++p == ' ') { + char_to_strip = is_ws; + p++; + } else + char_to_strip = is_tab; + } else + char_to_strip = NULL; + if (*p == '\\') { + p++; + multiline_unescape = 0; + multiline_delimiter_len = strcspn(p, " \t"); + } else if (*p == '"') { + char *q; + + p++; + multiline_unescape = 0; + q = strchr(p, '"'); + multiline_delimiter_len = q - p; + } else { + multiline_delimiter_len = strcspn(p, " \t"); + multiline_unescape = 1; + } + + /* Remove trailing newline */ + multiline_delimiter_len--; + multiline_delimiter = argot_malloc(multiline_delimiter_len + 1); + memcpy(multiline_delimiter, p, multiline_delimiter_len); + multiline_delimiter[multiline_delimiter_len] = 0; + argot_line_begin(); + + /* Update locus */ + argot_locus_point_advance_line(argot_current_locus_point); + yylloc.beg = argot_current_locus_point; + yylloc.beg.col++; +} + +static int +ident() +{ + char *p; + char *str; + size_t len; + + for (p = yytext; *p && isspace(*p); p++) + ; + + len = strlen(p); + str = argot_malloc(len + 1); + strcpy(str, p); + yylval.string = str; + return IDENT; +} + +static void +qstring_locus_fixup() +{ + if (argot_parser_options & ARGOT_OPTION_ADJUST_STRING_LOCATIONS) { + yylloc.beg.col++; + yylloc.end.col--; + } +} + +argot_value_t * +argot_value_ptr_from_static(argot_value_t *input) +{ + argot_value_t *ptr = argot_malloc(sizeof(*ptr)); + *ptr = *input; + return ptr; +} + + +static int +assign_locus(struct argot_locus_point *ploc, + char *name, char *line, size_t *pxlines) +{ + char *p; + + if (name) { + if (pxlines && (!ploc->file || strcmp(name, ploc->file))) + *pxlines = 0; + ploc->file = argot_install_text(name); + } + ploc->line = strtoul(line, &p, 10) - (pxlines ? *pxlines : 0); + ploc->col = 0; + return *p != 0; +} + +void +argot_parse_line_directive(char *text, argot_locus_t *ploc, + struct argot_locus_point *ppoint, size_t *pxlines) +{ + int rc = 1; + struct wordsplit ws; + + if (wordsplit(text, &ws, WRDSF_DEFFLAGS)) + argot_error(ploc, 0, _("cannot parse #line line: %s"), + wordsplit_strerror(&ws)); + else { + if (ws.ws_wordc == 2) + rc = assign_locus(ppoint, NULL, + ws.ws_wordv[1], pxlines); + else if (ws.ws_wordc == 3) + rc = assign_locus(ppoint, ws.ws_wordv[2], + ws.ws_wordv[1], pxlines); + else if (ws.ws_wordc == 4) { + rc = assign_locus(ppoint, ws.ws_wordv[2], + ws.ws_wordv[1], 0); + if (pxlines && rc == 0) { + char *p; + unsigned long x = strtoul(ws.ws_wordv[3], + &p, 10); + rc = *p != 0; + if (rc == 0) + *pxlines = x; + } + } else + argot_error(ploc, 0, _("invalid #line statement")); + + if (rc) + argot_error(ploc, 0, _("malformed #line statement")); + wordsplit_free(&ws); + } +} + +void +argot_parse_line_directive_cpp(char *text, argot_locus_t *ploc, + struct argot_locus_point *ppoint, + size_t *pxlines) +{ + struct wordsplit ws; + + if (wordsplit(text, &ws, WRDSF_DEFFLAGS)) { + argot_error(ploc, 0, _("cannot parse #line line: %s"), + wordsplit_strerror(&ws)); + return; + } else if (ws.ws_wordc < 3) + argot_error(ploc, 0, _("invalid #line statement")); + else { + if (assign_locus(ppoint, ws.ws_wordv[2], + ws.ws_wordv[1], pxlines)) + argot_error(ploc, 0, _("malformed #line statement")); + } + wordsplit_free(&ws); +} + |