diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-13 14:05:13 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-14 02:27:27 +0300 |
commit | ce29f168ed52f08228b99a789785271a4a3c9b0e (patch) | |
tree | acdcf09cc887bc508d8d803ad971f96e9da442be /src/ellinika | |
parent | bee3becef44e298f59d72cee3c8e552bccb10d65 (diff) | |
download | ellinika-ce29f168ed52f08228b99a789785271a4a3c9b0e.tar.gz ellinika-ce29f168ed52f08228b99a789785271a4a3c9b0e.tar.bz2 |
Implement escape function.
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/.gitignore | 1 | ||||
-rw-r--r-- | src/ellinika/Makefile.am | 6 | ||||
-rw-r--r-- | src/ellinika/elmorph.c | 41 | ||||
-rw-r--r-- | src/ellinika/elmorph.h | 2 | ||||
-rw-r--r-- | src/ellinika/sql.scm | 33 | ||||
-rw-r--r-- | src/ellinika/tests/escape.scm | 6 | ||||
-rw-r--r-- | src/ellinika/utf8scm.c | 149 |
7 files changed, 196 insertions, 42 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore index 11bf478..2c9a340 100644 --- a/src/ellinika/.gitignore +++ b/src/ellinika/.gitignore @@ -5,3 +5,4 @@ elmorph.scm elmorph.x phoneme.c phoneme.h +utf8scm.x diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am index b8988d4..581a537 100644 --- a/src/ellinika/Makefile.am +++ b/src/ellinika/Makefile.am @@ -22,7 +22,8 @@ guile_DATA=\ config.scm\ dico.scm\ elmorph.scm\ - tenses.scm + tenses.scm\ + sql.scm cgi.m4: Makefile echo 'divert(-1)' > $@ @@ -57,13 +58,14 @@ pkglib_LTLIBRARIES=libelmorph.la libelmorph_la_SOURCES = \ aorist.c\ utf8.c\ + utf8scm.c\ elchr.c\ elmorph.c\ elmorph.h\ phoneme.y\ syllabificator.c -DOT_X_FILES = elmorph.x +DOT_X_FILES = elmorph.x utf8scm.x BUILT_SOURCES = $(DOT_X_FILES) diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 6d6a69a..56bead3 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -769,46 +769,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_phoneme, "elstr-char-phoneme", } #undef FUNC_NAME -SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0, - (SCM string), -"Convert STRING to uppercase\n") -#define FUNC_NAME s_scm_utf8_toupper -{ - char *str; - SCM scm; - - SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); - str = scm_to_locale_string(string); - if (utf8_toupper(str, strlen(str))) - scm_misc_error(FUNC_NAME, - "cannot convert to upper case: ~S", - scm_list_1(string)); - scm = scm_from_locale_string(str); - free(str); - return scm; -} -#undef FUNC_NAME - -SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0, - (SCM string), -"Convert STRING to lowercase\n") -#define FUNC_NAME s_scm_utf8_tolower -{ - char *str; - SCM scm; - - SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); - str = scm_to_locale_string(string); - if (utf8_tolower(str, strlen(str))) - scm_misc_error(FUNC_NAME, - "cannot convert to lower case: ~S", - scm_list_1(string)); - scm = scm_from_locale_string(str); - free(str); - return scm; -} -#undef FUNC_NAME - static SCM _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name) { @@ -1036,4 +996,5 @@ scm_init_ellinika_elmorph_module() scm_c_export(deftab[i].sym, NULL); } #include "elmorph.x" + elmorph_utf8scm_init(); } diff --git a/src/ellinika/elmorph.h b/src/ellinika/elmorph.h index c0df60f..83e1c83 100644 --- a/src/ellinika/elmorph.h +++ b/src/ellinika/elmorph.h @@ -137,3 +137,5 @@ int phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len); int syllable_map(struct syllable **psyl, size_t *plen, struct phoneme *phon, size_t nphon); + +void elmorph_utf8scm_init(void); diff --git a/src/ellinika/sql.scm b/src/ellinika/sql.scm new file mode 100644 index 0000000..c0301ad --- /dev/null +++ b/src/ellinika/sql.scm @@ -0,0 +1,33 @@ +(define-module (ellinika sql)) + +(use-modules (srfi srfi-1) + (ellinika elmorph) + (gamma sql)) + +(define ellinika:sql-verbose #f) +(define ellinika:sql-conn #f) + +(define (ellinika:sql-connect arg) + (set! ellinika:sql-conn (sql-open-connection args)) + (if ellinika:sql-conn + (sql-query ellinika:sql-conn "SET NAMES utf8")) + ellinika:sql-conn) + +(define (ellinika:sql-disconnect) + (if ellinika:sql-conn (sql-close-connection ellinika:sql-conn))) + +(define (ellinika:sql-query format . rest) + (let ((query (apply format #f + (map (lambda (arg) + (if arg + "NULL" + (utf8-escape arg))) + rest)))) + (cond + (ellinika:sql-verbose + (format #f "QUERY: ~A~%" query) + (let ((res (sql-query ellinika:sql-conn query))) + (format #f "RES: ~A~%" res) + res)) + (else + (sql-query ellinika:sql-conn query))))) diff --git a/src/ellinika/tests/escape.scm b/src/ellinika/tests/escape.scm new file mode 100644 index 0000000..841e496 --- /dev/null +++ b/src/ellinika/tests/escape.scm @@ -0,0 +1,6 @@ +(load-extension "./libelmorph" "scm_init_ellinika_elmorph_module") + +(display (utf8-escape "ο \"παρα\\κείμενος\"")) +(newline) +(display (utf8-escape "ο \"παρα\\κείμενος\"" "ία" )) +(newline) diff --git a/src/ellinika/utf8scm.c b/src/ellinika/utf8scm.c new file mode 100644 index 0000000..89f5fba --- /dev/null +++ b/src/ellinika/utf8scm.c @@ -0,0 +1,149 @@ +/* This file is part of Ellinika project. + Copyright (C) 2011 Sergey Poznyakoff + + Ellinika is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Ellinika is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +#include <errno.h> +#include <stdlib.h> +#include <libguile.h> +#include "utf8.h" + + +SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0, + (SCM string), +"Convert STRING to uppercase\n") +#define FUNC_NAME s_scm_utf8_toupper +{ + char *str; + SCM scm; + + SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); + str = scm_to_locale_string(string); + if (utf8_toupper(str, strlen(str))) + scm_misc_error(FUNC_NAME, + "cannot convert to upper case: ~S", + scm_list_1(string)); + scm = scm_from_locale_string(str); + free(str); + return scm; +} +#undef FUNC_NAME + +SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0, + (SCM string), +"Convert STRING to lowercase\n") +#define FUNC_NAME s_scm_utf8_tolower +{ + char *str; + SCM scm; + + SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); + str = scm_to_locale_string(string); + if (utf8_tolower(str, strlen(str))) + scm_misc_error(FUNC_NAME, + "cannot convert to lower case: ~S", + scm_list_1(string)); + scm = scm_from_locale_string(str); + free(str); + return scm; +} +#undef FUNC_NAME + +static int +memberof(unsigned *w, size_t len, unsigned c) +{ + while (len--) + if (*w++ == c) + return 1; + return 0; +} + +SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0, + (SCM string, SCM escapable), +"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n") +#define FUNC_NAME s_scm_utf8_escape +{ + SCM scm; + unsigned *escptr, *escbase; + size_t esclen; + char *s; + unsigned *wptr, *nptr; + size_t wlen; + size_t incr, i; + + SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); + s = scm_to_locale_string(string); + + if (utf8_mbstr_to_wc(s, &wptr, &wlen)) + scm_misc_error(FUNC_NAME, + "cannot convert ~S to UTF-8", + scm_list_1(string)); + free(s); + + if (SCM_UNBNDP(escapable)) { + static unsigned default_escapable[] = { '\"', '\\' }; + escbase = NULL; + escptr = default_escapable; + esclen = 2; + } else { + SCM_ASSERT(scm_is_string(escapable), escapable, + SCM_ARG2, FUNC_NAME); + s = scm_to_locale_string(escapable); + if (utf8_mbstr_to_wc(s, &escbase, &esclen)) { + free(wptr); + scm_misc_error(FUNC_NAME, + "cannot convert ~S to UTF-8", + scm_list_1(escapable)); + } + escptr = escbase; + free(s); + } + + incr = 0; + for (i = 0; i < wlen; i++) + if (memberof(escptr, esclen, wptr[i])) + incr++; + + + nptr = calloc(sizeof(nptr[0]), wlen + incr); + if (!nptr) + scm_memory_error(FUNC_NAME); + + for (i = incr = 0; i < wlen; i++) { + if (memberof(escptr, esclen, wptr[i])) + nptr[i + incr++] = '\\'; + nptr[i + incr] = wptr[i]; + } + + free(wptr); + free(escbase); + + if (utf8_wc_to_mbstr(nptr, wlen + incr, &s)) + scm_misc_error(FUNC_NAME, + "cannot convert UTF-8 to Scheme", + SCM_EOL); + scm = scm_from_locale_string(s); + free(s); + return scm; +} +#undef FUNC_NAME + +void +elmorph_utf8scm_init() +{ +#include "utf8scm.x" +} |