diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2020-11-01 11:28:22 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2020-11-01 13:23:05 +0200 |
commit | 424b234cf37355421cdd58e144e64c3a1e7cc249 (patch) | |
tree | 41271cf144a86655df439bd2c5675596c89fbacc /src | |
parent | 5c9b1f86bda16d774898a6c82a357adf51a9a79b (diff) | |
download | mailfromd-424b234cf37355421cdd58e144e64c3a1e7cc249.tar.gz mailfromd-424b234cf37355421cdd58e144e64c3a1e7cc249.tar.bz2 |
MFL: New built-in function "sed".
The "sed" function (actually, a statement) transforms its first argument
using the sed expressions given in its subsequent arguments and returns the
result:
set res sed(input, 's/foo/bar/g', 's/bar/Baz/')
* lib/libmf.h (transform_append_t)
(transform_reduce_t): Remove typedefs.
(transform_string): Change signature.
* lib/transform.c (parse_transform_expr): Fix parsing of the
regexp part.
(transform_string): Rewrite. No additional callbacks needed.
* src/drivers.c: New node types: 'sedcomp', to compile the sed
transformation expression, and 'sed', to apply it.
* src/gram.y: New production for sed(arg, expr).
* src/lex.l (sed): New keyword.
* src/mailfromd.h (sedcomp_node, sed_node): New structs.
(node): Add new sedcomp_node and sed_node parts to the union.
* src/opcodes (SEDCOMP, SED): New opcodes.
* src/prog.c (next_transform_index)
(install_transform,get_transform): New functions.
(instr_sedcomp, instr_sed): New instruction handlers.
* src/symbols.c (import_rules_eval): Simplify transform expression
handling.
* tests/sed.at: New file.
* tests/Makefile.am: Add sed.at
* tests/testsuite.at: Include sed.at
Diffstat (limited to 'src')
-rw-r--r-- | src/drivers.c | 101 | ||||
-rw-r--r-- | src/gram.y | 74 | ||||
-rw-r--r-- | src/lex.l | 1 | ||||
-rw-r--r-- | src/mailfromd.h | 18 | ||||
-rw-r--r-- | src/opcodes | 3 | ||||
-rw-r--r-- | src/prog.c | 84 | ||||
-rw-r--r-- | src/symbols.c | 27 |
7 files changed, 258 insertions, 50 deletions
diff --git a/src/drivers.c b/src/drivers.c index 454afce4..e552ea5d 100644 --- a/src/drivers.c +++ b/src/drivers.c @@ -2772,5 +2772,106 @@ code_type_offset(NODE *node, struct mu_locus_range const **old_locus) code_immediate(node->v.var_ref.variable->ord, size); } } + +/* type sedcomp */ +void +mark_type_sedcomp(NODE *node) +{ + mark(node->v.sedcomp.expr); +} + +void +print_type_sedcomp(NODE *node, int level) +{ + print_level(level); + printf("SEDCOMP %zu %d\n", node->v.sedcomp.index, node->v.sedcomp.flags); + print_node(node->v.sedcomp.expr, level + 1); +} + +void +optimize_type_sedcomp(NODE *node) +{ + NODE *arg = node->v.sedcomp.expr; + optimize(arg); + if (arg->type == node_type_string) { + transform_t t = transform_compile(arg->v.literal->text, + node->v.sedcomp.flags); + if (!t) + parse_error_locus(&arg->locus, + _("invalid transform string: %s"), + transform_error_string()); + install_transform(node->v.sedcomp.index, t); + node->type = node_type_number; + node->v.number = node->v.sedcomp.index; + free_node(arg); + } +} + +void +code_type_sedcomp(NODE *node, struct mu_locus_range const **old_locus) +{ + code_node(node->v.sedcomp.expr); + mark_locus(node, old_locus); + code_op(opcode_sedcomp); + code_immediate(node->v.sedcomp.index, size); + code_immediate(node->v.sedcomp.flags, int); +} + +/* type sed */ +void +mark_type_sed(NODE *node) +{ + mark(node->v.sed.arg); + mark(node->v.sed.comp); +} + +void +print_type_sed(NODE *node, int level) +{ + print_level(level); + printf("SED\n"); + print_node(node->v.sed.arg, level + 1); + print_node(node->v.sed.comp, level + 1); +} + +void +optimize_type_sed(NODE *node) +{ + optimize(node->v.sed.arg); + optimize(node->v.sed.comp); + if (node->v.sed.comp->type == node_type_number && + node->v.sed.arg->type == node_type_string) { + transform_t t = get_transform(node->v.sed.comp->v.number); + char *res = transform_string(t, node->v.sed.arg->v.literal->text); + free_subtree(node->v.sed.arg); + free_subtree(node->v.sed.comp); + node->type = node_type_string; + string_begin(); + string_add(res, strlen(res)); + node->v.literal = string_finish(); + free(res); + } else if (node->v.sed.comp->type == node_type_number && + node->v.sed.arg->type == node_type_sed && + node->v.sed.arg->v.sed.comp->type == node_type_number) { + NODE *arg = node->v.sed.arg; + + transform_join(get_transform(arg->v.sed.comp->v.number), + get_transform(node->v.sed.comp->v.number)); + /* FIXME: transform expression at index + node->v.sed.comp->v.number remains unused */ + node->v.sed.comp->v.number = arg->v.sed.comp->v.number; + free_subtree(arg->v.sed.comp); + node->v.sed.arg = arg->v.sed.arg; + } +} + +void +code_type_sed(NODE *node, struct mu_locus_range const **old_locus) +{ + code_node(node->v.sed.arg); + code_node(node->v.sed.comp); + mark_locus(node, old_locus); + code_op(opcode_sed); +} @@ -419,6 +419,8 @@ _create_alias(void *item, void *data) T_DCLEX "dclex" T_SHL "<<" T_SHR ">>" + T_SED "sed" + %token <node> T_COMPOSE "composed string" %token T_MODBEG T_MODEND %token <literal> T_STRING "string" @@ -1221,7 +1223,7 @@ action : sendmail_action | T_ECHO expr { $$ = alloc_node(node_type_echo, &@1); - $$->v.node = cast_to(dtype_string, $2); + $$->v.node = cast_to(dtype_string, $2); } ; @@ -2039,6 +2041,46 @@ funcall : T_BUILTIN '(' arglist ')' $$->v.builtin.args = NULL; } } + | T_SED '(' arglist ')' + { + NODE *arg, *expr; + + if ($3.count < 2) { + parse_error_locus(&@1, + _("too few arguments in call to `%s'"), + "sed"); + YYERROR; + } + + arg = cast_to(dtype_string, $3.head); + expr = $3.head->next; + /* Break the link between the args lest any eventual + optimizations cause grief later in mark phase. */ + $3.head->next = NULL; + + do { + NODE *comp, *next; + struct mu_locus_range lr; + + comp = alloc_node(node_type_sedcomp, &expr->locus); + comp->v.sedcomp.index = next_transform_index(); + comp->v.sedcomp.expr = cast_to(dtype_string, expr); + comp->v.sedcomp.flags = regex_flags; + + lr.beg = @1.beg; + lr.end = expr->locus.end; + + $$ = alloc_node(node_type_sed, &lr); + $$->v.sed.comp = comp; + $$->v.sed.arg = arg; + + next = expr->next; + expr->next = NULL; + expr = next; + + arg = $$; + } while (expr); + } | T_FUNCTION '(' arglist ')' { if (check_func_usage($1, &@1)) @@ -3647,35 +3689,32 @@ node_type(NODE *node) switch (node->type) { case node_type_string: case node_type_symbol: + case node_type_sed: + case node_type_concat: + case node_type_argx: + case node_type_backref: return dtype_string; case node_type_number: + case node_type_bin: + case node_type_un: + case node_type_sedcomp: + case node_type_offset: + case node_type_vaptr: return dtype_number; case node_type_if: return dtype_unspecified; - case node_type_bin: - return dtype_number; - - case node_type_un: - return dtype_number; - case node_type_builtin: return node->v.builtin.builtin->rettype; - case node_type_concat: - return dtype_string; - case node_type_variable: return node->v.var_ref.variable->type; case node_type_arg: return node->v.arg.data_type; - case node_type_argx: - return dtype_string; - case node_type_call: return node->v.call.func->rettype; @@ -3684,18 +3723,9 @@ node_type(NODE *node) return node_type(node->v.node); break; - case node_type_backref: - return dtype_string; - case node_type_cast: return node->v.cast.data_type; - case node_type_offset: - return dtype_number; - - case node_type_vaptr: - return dtype_number; - case node_type_result: case node_type_header: case node_type_asgn: @@ -723,6 +723,7 @@ dclex return T_DCLEX; 0{O}{O}* { yylval.number = strtoul(yytext, NULL, 8); return T_NUMBER; }; 0|{P} { yylval.number = strtoul(yytext, NULL, 10); return T_NUMBER; }; /* Identifiers */ +"sed" return T_SED; {IDENT} { const struct constant *cptr; diff --git a/src/mailfromd.h b/src/mailfromd.h index 362bfeb0..f35ef990 100644 --- a/src/mailfromd.h +++ b/src/mailfromd.h @@ -353,6 +353,18 @@ struct argx_node { int nargs; NODE *node; }; + +struct sedcomp_node { + size_t index; + NODE *expr; /* Substitution expression */ + int flags; +}; + +struct sed_node { + NODE *comp; + NODE *arg; +}; + #include "node-type.h" /* Parse tree node */ @@ -387,6 +399,8 @@ struct node { struct sym_regex *regex; struct loop_node loop; struct argx_node argx; + struct sedcomp_node sedcomp; + struct sed_node sed; } v; }; @@ -593,6 +607,10 @@ struct rt_regex { void register_regex(struct sym_regex *rp); void finalize_regex(void); +size_t next_transform_index(void); +void install_transform(size_t index, transform_t tp); +transform_t get_transform(size_t index); + struct pragma { SYMENT_STRUCT(name); diff --git a/src/opcodes b/src/opcodes index 77c8f16d..5cc373e7 100644 --- a/src/opcodes +++ b/src/opcodes @@ -96,3 +96,6 @@ FUNCALL dump_funcall 2 NEXT NULL 0 RESULT dump_result 1 HEADER dump_header 2 + +SEDCOMP dump_sedcomp 2 +SED NULL 0 @@ -60,7 +60,6 @@ size_t dataseg_reloc_count; struct rt_regex *regtab; size_t regcount; mu_opool_t regpool; - void code_init() @@ -149,7 +148,34 @@ code_peek(prog_counter_t pos) assert(pos < pmax); return prog[pos]; } + +static size_t transform_count; +static size_t transform_max; +static transform_t *transform_tab; + +size_t +next_transform_index(void) +{ + if (transform_count == transform_max) { + transform_tab = mu_2nrealloc(transform_tab, + &transform_max, + sizeof(transform_tab[0])); + } + transform_tab[transform_count] = NULL; + return transform_count++; +} + +void +install_transform(size_t index, transform_t tp) +{ + transform_tab[index] = tp; +} +transform_t +get_transform(size_t index) +{ + return transform_tab[index]; +} /* Regexps*/ @@ -213,7 +239,7 @@ disable_prog_trace(const char *modlist) } /* ======================================================================== - Drzewa w górê, rzeki w dó³. + Drzewa w górÄ™, rzeki w dół. Jacek Kaczmarski. "Upadek Ikara" @@ -1588,8 +1614,62 @@ dump_regcomp(prog_counter_t i) printf("%s", regex_flags_to_string(rtx->regflags, buffer, sizeof buffer)); } + +void +instr_sedcomp(eval_environ_t env) +{ + char * MFL_DATASEG str; + size_t index = mf_c_val(get_immediate(env, 0), size); + transform_t *tp = &transform_tab[index]; + int flags = mf_c_val(get_immediate(env, 1), int); + + get_string_arg(env, 0, &str); + if (PROG_TRACE_ENGINE) + prog_trace(env, "SUBCOMP %s %zu %d", str, index, flags); + + advance_pc(env, 2); + adjust_stack(env, 1); + + transform_free(*tp); + + *tp = transform_compile(str, flags); + if (*tp == NULL) + runtime_error(env, + _("invalid transform string \"%s\": %s"), + str, + transform_error_string()); + + push(env, (STKVAL) index); +} + +void +dump_sedcomp(prog_counter_t i) +{ + size_t index = mf_cell_c_value(prog[i], size); + int flags = mf_cell_c_value(prog[i+1], int); + printf("%zu %d", index, flags); +} void +instr_sed(eval_environ_t env) +{ + char * MFL_DATASEG arg; + long i = mf_c_val(get_arg(env, 0), long); + char *res; + + get_string_arg(env, 1, &arg); + + if (PROG_TRACE_ENGINE) + prog_trace(env, "SED %s %ld", arg, i); + + adjust_stack(env, 2); + + res = transform_string(transform_tab[i], arg); + pushs(env, res); + free(res); +} + +void instr_fnmatch(eval_environ_t env) { char *string, *pattern; diff --git a/src/symbols.c b/src/symbols.c index 4881a023..b5aa4260 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -192,21 +192,6 @@ import_rules_free(struct import_rule *rules) } int -_append_pool(void *ptr, const char *s, size_t len) -{ - mu_opool_t op = ptr; - mu_opool_append(op, s, len); - return 0; -} - -char * -_reduce_pool(void *ptr) -{ - mu_opool_t op = ptr; - return mu_opool_finish(op, NULL); -} - -int import_rules_eval(struct import_rule *rule, const char *name, char **newname) { @@ -236,17 +221,7 @@ import_rules_eval(struct import_rule *rule, const char *name, if (rule->neg) res = !res; if (res && rule->type == import_transform) { - char *str; - mu_opool_t op; - - mu_opool_create(&op, MU_OPOOL_ENOMEMABRT); - str = transform_string(rule->xform, - name, - op, - _append_pool, - _reduce_pool); - mu_opool_destroy(&op); - *newname = str; + *newname = transform_string(rule->xform, name); } break; } |