diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2012-03-12 18:02:38 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2012-03-12 18:02:38 +0200 |
commit | 495b9d019b1c11574b967287cd40ec2cc84457c2 (patch) | |
tree | d4397cb8613e34fc5f766672ba165080457e365f | |
parent | 9eae40fa6a034b8675815098c60fbee12f17ad02 (diff) | |
download | swis-495b9d019b1c11574b967287cd40ec2cc84457c2.tar.gz swis-495b9d019b1c11574b967287cd40ec2cc84457c2.tar.bz2 |
Initial implementation of the search utility.
* include/swis/Makefile.am (pkginclude_HEADERS): Add query.h
* include/swis/backend.h (swis_backend_module) <swis_search>: New
member.
* include/swis/query.h: New file.
* src/.gitignore: Update.
* src/Makefile.am: Install auxiliary utils to pkglibexec.
Move dynload.c to libswisapp.a
Build search utility.
* src/gram.y: New file.
* src/parse.l: New file.
* src/search-cfg.def: New file.
* src/search-cli.opt: New file.
* src/search.c: New file.
* src/search.h: New file.
* src/store-cli.opt: Rename the utility to 'store'
* src/swis.h (swis_backend_load): New proto.
-rw-r--r-- | include/swis/Makefile.am | 2 | ||||
-rw-r--r-- | include/swis/backend.h | 2 | ||||
-rw-r--r-- | include/swis/query.h | 59 | ||||
-rw-r--r-- | src/.gitignore | 6 | ||||
-rw-r--r-- | src/Makefile.am | 27 | ||||
-rw-r--r-- | src/gram.y | 140 | ||||
-rw-r--r-- | src/parse.l | 81 | ||||
-rw-r--r-- | src/search-cfg.def | 21 | ||||
-rw-r--r-- | src/search-cli.opt | 103 | ||||
-rw-r--r-- | src/search.c | 146 | ||||
-rw-r--r-- | src/search.h | 20 | ||||
-rw-r--r-- | src/store-cli.opt | 2 | ||||
-rw-r--r-- | src/swis.h | 1 |
13 files changed, 601 insertions, 9 deletions
diff --git a/include/swis/Makefile.am b/include/swis/Makefile.am index 2c6511d..59c15de 100644 --- a/include/swis/Makefile.am +++ b/include/swis/Makefile.am @@ -14,4 +14,4 @@ # You should have received a copy of the GNU General Public License # along with SWIS. If not, see <http://www.gnu.org/licenses/>. -pkginclude_HEADERS = backend.h utf8.h
\ No newline at end of file +pkginclude_HEADERS = backend.h utf8.h query.h
\ No newline at end of file diff --git a/include/swis/backend.h b/include/swis/backend.h index 99d982c..fe513cd 100644 --- a/include/swis/backend.h +++ b/include/swis/backend.h @@ -28,6 +28,7 @@ typedef void *swis_backend_handle_t; struct grecs_node; +struct swis_node; struct swis_backend_module { @@ -40,6 +41,7 @@ struct swis_backend_module int (*swis_store_text) (swis_backend_handle_t bh, const char *text); int (*swis_store_word) (swis_backend_handle_t bh, char *word, unsigned long off, unsigned long n); + int (*swis_search) (swis_backend_handle_t bh, struct swis_node *query); }; #endif diff --git a/include/swis/query.h b/include/swis/query.h new file mode 100644 index 0000000..3f698d0 --- /dev/null +++ b/include/swis/query.h @@ -0,0 +1,59 @@ +/* This file is part of SWIS + Copyright (C) 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef __swis_query_h +#define __swis_query_h + +#include <stddef.h> + +enum swis_node_type + { + swis_node_op, + swis_node_word, + swis_node_seq + }; + +enum swis_op_code + { + swis_op_or, + swis_op_and, + swis_op_not + }; + +struct swis_operation +{ + enum swis_op_code opcode; + struct swis_node *arg[2]; +}; + +struct swis_sequence +{ + struct swis_node *this; + struct swis_node *next; +}; + +struct swis_node +{ + enum swis_node_type type; + union + { + char *word; + struct swis_operation op; + struct swis_sequence seq; + } v; +}; + +#endif diff --git a/src/.gitignore b/src/.gitignore index 7606ebe..25ba901 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,8 +1,12 @@ cfgtool html-strip.c html-strip -swis-store +store +search *-cli.h *-cfg.h swis word-split +gram.[ch] +gram.output +parse.c diff --git a/src/Makefile.am b/src/Makefile.am index 05f091c..1676e4c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,7 +16,7 @@ noinst_LIBRARIES=libswisapp.a -libexec_PROGRAMS=html-strip word-split cfgtool swis-store +pkglibexec_PROGRAMS=html-strip word-split cfgtool store search bin_SCRIPTS=swis BUILT_SOURCES= @@ -47,8 +47,7 @@ BUILT_SOURCES += word-split-cli.h word-split-cfg.h EXTRA_DIST += word-split-cli.opt word-split-cfg.def word_split_LDADD=$(LDADD) -swis_store_SOURCES=\ - dynload.c\ +store_SOURCES=\ store.c\ store-cli.h\ store-cfg.h @@ -56,16 +55,32 @@ swis_store_SOURCES=\ BUILT_SOURCES += store-cli.h store-cfg.h EXTRA_DIST += store-cli.opt store-cfg.def -swis_store_LDADD=$(LDADD) @LIBLTDL@ +store_LDADD=$(LDADD) @LIBLTDL@ + +search_SOURCES=\ + gram.y\ + parse.l\ + search.c\ + search.h\ + search-cli.h\ + search-cfg.h + +BUILT_SOURCES += search-cli.h search-cfg.h +EXTRA_DIST += search-cli.opt search-cfg.def + +search_LDADD=$(LDADD) @LIBLTDL@ + +AM_LFLAGS=-d +AM_YFLAGS=-dtv libswisapp_a_SOURCES=\ config.c\ + dynload.c\ readname.c\ utf8io.c INCLUDES=-I${top_srcdir}/include -I${top_srcdir}/gnu -I../gnu @GRECS_INCLUDES@ LDADD=./libswisapp.a ../lib/libswis.la ../gnu/libgnu.a $(LIBICONV) -AM_LFLAGS=-d AM_CPPFLAGS= \ -DSYSCONFDIR=\"$(sysconfdir)\"\ @@ -77,7 +92,7 @@ AM_CPPFLAGS= \ swis: ${top_srcdir}/src/swis.in sed -e 's|This file is part of SWIS|&; Do not edit, it is generated automatically|'\ - -e 's|=SYSCONFDIR=|${sysconfdir}|g;s|=LIBEXECDIR=|${libexecdir}|g' \ + -e 's|=SYSCONFDIR=|${sysconfdir}|g;s|=LIBEXECDIR=|${pkglibexecdir}|g' \ ${top_srcdir}/src/swis.in > $@-t && \ cmp $@-t swis >/dev/null 2>&1 || mv $@-t swis && chmod +x swis diff --git a/src/gram.y b/src/gram.y new file mode 100644 index 0000000..f272acb --- /dev/null +++ b/src/gram.y @@ -0,0 +1,140 @@ +%{ +/* This file is part of SWIS + Copyright (C) 2007, 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +#include "swis.h" +#include "gram.h" +#include "swis/query.h" + +struct swis_node *query_tree; +struct swis_node *create_node (enum swis_node_type type); +%} +%token <word> T_WORD +%left T_OR +%left T_AND +%left T_NOT +%type <node> expr or_expr and_expr not_expr term word +%type <nodelist> sequence + +%union { + char *word; + struct swis_node *node; + struct nodelist + { + struct swis_node *head; + struct swis_node *tail; + } nodelist; +}; + +%% +input : expr + { + query_tree = $1; + } + ; + +expr : or_expr + ; + +or_expr : and_expr + | or_expr T_OR and_expr + { + $$ = create_node (swis_node_op); + $$->v.op.opcode = swis_op_or; + $$->v.op.arg[0] = $1; + $$->v.op.arg[1] = $3; + } + ; + +and_expr: not_expr + | and_expr and not_expr + { + $$ = create_node (swis_node_op); + $$->v.op.opcode = swis_op_and; + $$->v.op.arg[0] = $1; + $$->v.op.arg[1] = $3; + } + ; + +and : /* empty */ + | T_AND + ; + +not_expr: term + | T_NOT term + { + $$ = create_node (swis_node_op); + $$->v.op.opcode = swis_op_not; + $$->v.op.arg[0] = $2; + } + ; + +term : word + | '"' sequence '"' + { + if ($2.head == $2.tail) + { + $$ = $2.head->v.seq.this; + free ($2.head); + } + else + $$ = $2.head; + } + | '(' expr ')' + { + $$ = $2; + } + ; + +sequence: word + { + $$.head = $$.tail = create_node (swis_node_seq); + $$.head->v.seq.this = $1; + $$.head->v.seq.next = NULL; + } + | sequence word + { + struct swis_node *t = create_node (swis_node_seq); + t->v.seq.this = $2; + t->v.seq.next = NULL; + + $1.tail->v.seq.next = t; + $1.tail = t; + $$ = $1; + } + ; + +word : T_WORD + { + $$ = create_node (swis_node_word); + $$->v.word = $1; + } + ; + +%% +int +yyerror (char *s) +{ + error (1, 0, "%s", s); +} + +struct swis_node * +create_node (enum swis_node_type type) +{ + struct swis_node *np = grecs_zalloc (sizeof np[0]); + np->type = type; + return np; +} diff --git a/src/parse.l b/src/parse.l new file mode 100644 index 0000000..87f0c13 --- /dev/null +++ b/src/parse.l @@ -0,0 +1,81 @@ +%{ +/* This file is part of SWIS + Copyright (C) 2007, 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +#include "swis.h" +#include "gram.h" + +static struct +{ + int wordc; + int wordi; + char **wordv; + char *string; + size_t size; +} in; + +#define YY_INPUT(buf,result,max_size) \ + do \ + { \ + result = in.size < max_size ? in.size : max_size; \ + memcpy (buf, in.string, result); \ + in.string += result; \ + in.size -= result; \ + } \ + while (0) + +%} + +%option 8bit +WS [ \t]+ +PR [^ \t\n\"()] +WORD {PR}{PR}* +%s QUOTE +%% +\+ return T_AND; +<INITIAL>\| return T_OR; +- return T_NOT; +{WORD} { + yylval.word = grecs_strdup (yytext); + return T_WORD; +} +<INITIAL>\" { BEGIN (QUOTE); return yytext[0]; } +<QUOTE>\" { BEGIN (INITIAL); return yytext[0]; } +"("|")" return yytext[0]; +. ; + +%% + +int +yywrap () +{ + if (in.wordi == in.wordc) + return 1; + in.string = in.wordv[in.wordi++]; + in.size = strlen(in.string); + return 0; +} + +void +lex_init (int argc, char **argv) +{ + in.wordc = argc; + in.wordv = argv; + in.wordi = 0; + yywrap(); +} + + diff --git a/src/search-cfg.def b/src/search-cfg.def new file mode 100644 index 0000000..e51c372 --- /dev/null +++ b/src/search-cfg.def @@ -0,0 +1,21 @@ +/* This file is part of SWIS + Copyright (C) 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +SWIS_STMT(`module-load-path', `GRECS_DFLT, &module_load_path') +SWIS_STMT(`prepend-load-path', `GRECS_DFLT, &prepend_load_path') +SWIS_STMT(`backend',`GRECS_DFLT, &backend_name') + +SWIS_CONFIG(`swis-search') diff --git a/src/search-cli.opt b/src/search-cli.opt new file mode 100644 index 0000000..f287295 --- /dev/null +++ b/src/search-cli.opt @@ -0,0 +1,103 @@ +/* This file is part of SWIS + Copyright (C) 2007, 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +#include "wordsplit.h" + +OPTIONS_BEGIN("swis-search", + [<SWIS search utility>], + [<>], + [<gnu>], + [<copyright_year=2012>], + [<copyright_holder=Sergey Poznyakoff>]) + +OPTION(progname,,NAME, + [<use NAME as program name>]) +BEGIN + program_name = optarg; +END + +OPTION(backend,b,NAME, + [<set backend name>]) +BEGIN + OPTNODE("backend", optarg); +END + +OPTION(debug,d,ARG, + [<output debugging info>]) +BEGIN + size_t i; + struct wordsplit ws; + ws.ws_delim = ","; + if (wordsplit (optarg, &ws, WRDSF_DEFFLAGS|WRDSF_DELIM|WRDSF_SHOWERR)) + exit (EX_USAGE); + for (i = 0; i < ws.ws_wordc; i++) + { + if (strcmp (ws.ws_wordv[i], "gram") == 0) + yydebug = 1; + else if (strcmp (ws.ws_wordv[i], "lex") == 0) + yy_flex_debug = 1; + else + { + char *p; + + debug_level = strtol (ws.ws_wordv[i], &p, 10); + if (*p) + error (EX_USAGE, 0, "not a number: %s", ws.ws_wordv[i]); + } + } + wordsplit_free (&ws); +END + +OPTION(config-file,c,FILE, + [<use FILE instead of the default configuration>]) +BEGIN + conffile = optarg; +END + +GROUP(Selecting program mode) + +OPTION(lint,,, + [<parse configuration file and exit>]) +BEGIN + lint_mode = 1; +END + +OPTION(,E,, + [<preprocess config and exit>]) +BEGIN + preprocess_only = 1; +END + +GROUP([<Additional help>]) +OPTION(config-help,,, + [<show configuration file summary>]) +BEGIN + config_help (); + exit (0); +END + +OPTIONS_END + +int +parse_options (int argc, char *argv[]) +{ + int index; + GETOPT(argc, argv, index); + return index; +} + + + diff --git a/src/search.c b/src/search.c new file mode 100644 index 0000000..643c0b6 --- /dev/null +++ b/src/search.c @@ -0,0 +1,146 @@ +/* This file is part of SWIS + Copyright (C) 2007, 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +#include "swis.h" +#include "swis/backend.h" +#include "swis/query.h" +#include "search.h" + +int debug_level; +char *backend_name; +struct swis_backend_module *module; +swis_backend_handle_t bh; + + +void +print_node (struct swis_node *node, int level) +{ + if (!node) + return; + printf ("%*.*s", level, level, " "); + switch (node->type) + { + case swis_node_word: + printf ("WORD %s\n", node->v.word); + break; + + case swis_node_op: + switch (node->v.op.opcode) + { + case swis_op_not: + puts ("NOT"); + break; + + case swis_op_or: + puts ("OR"); + break; + + case swis_op_and: + puts ("AND"); + break; + + default: + abort (); + } + print_node (node->v.op.arg[0], level + 1); + print_node (node->v.op.arg[1], level + 1); + break; + + case swis_node_seq: + puts ("SEQ"); + for (node; node; node = node->v.seq.next) + print_node (node->v.seq.this, level + 1); + break; + + default: + abort (); + } +} + +#include "search-cli.h" +#include "search-cfg.h" + +int +main (int argc, char **argv) +{ + int index, rc; + struct grecs_node *tree, *subtree; + char buf[256]; + + set_program_name (argv[0]); + yydebug = yy_flex_debug = 0; + config_init (); + index = parse_options (argc, argv); + + if (preprocess_only) + exit (grecs_preproc_run (conffile, grecs_preprocessor) ? EX_CONFIG : 0); + tree = grecs_parse (conffile); + if (!tree) + exit (EX_CONFIG); + + config_finish (tree); + if (lint_mode) + exit (0); + + argc -= index; + argv += index; + + if (argc) + lex_init (argc, argv); + else + { + char *x[2]; + char *query = getenv ("QUERY_STRING"); + if (!query) + error (1, 0, "no query"); + x[0] = query; + x[1] = NULL; + lex_init (1, x); + } + + if (yyparse ()) + return 1; + + if (debug_level) + { + print_node (query_tree, 0); + return 0; + } + + if (!backend_name) + error (1, 0, "backend name not specified"); + swis_loader_init (); + module = swis_backend_load (backend_name); + if (!module) + exit (EX_UNAVAILABLE); + if (!module->swis_search) + error (1, 0, "backend is not suitable for searching"); + snprintf (buf, sizeof (buf), "backend-config=\"%s\"", backend_name); + subtree = grecs_find_node (tree, buf); + if (subtree) + { + subtree = subtree->down; + } + + bh = module->swis_open (subtree, SWIS_MODE_READ); + if (!bh) + exit (EX_UNAVAILABLE); + + rc = module->swis_search (bh, query_tree); + + module->swis_close (bh); + return !!rc; +} diff --git a/src/search.h b/src/search.h new file mode 100644 index 0000000..cd49c13 --- /dev/null +++ b/src/search.h @@ -0,0 +1,20 @@ +/* This file is part of SWIS + Copyright (C) 2007, 2012 Sergey Poznyakoff + + SWIS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + SWIS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ + +void lex_init (int argc, char **argv); +extern int yydebug, yy_flex_debug; +extern struct swis_node *query_tree; + diff --git a/src/store-cli.opt b/src/store-cli.opt index ee84a33..1489250 100644 --- a/src/store-cli.opt +++ b/src/store-cli.opt @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with SWIS. If not, see <http://www.gnu.org/licenses/>. */ -OPTIONS_BEGIN("swis-store", +OPTIONS_BEGIN("store", [<SWIS store backend>], [<[FILE [FILE...]]>], [<gnu>], @@ -63,5 +63,6 @@ int assert_string_arg (grecs_locus_t *locus, enum grecs_callback_command cmd, const grecs_value_t *value); +struct swis_backend_module *swis_backend_load (const char *name); |