summaryrefslogtreecommitdiffabout
path: root/src/ellinika
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-13 11:05:13 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2011-06-13 23:27:27 (GMT)
commitce29f168ed52f08228b99a789785271a4a3c9b0e (patch) (side-by-side diff)
treeacdcf09cc887bc508d8d803ad971f96e9da442be /src/ellinika
parentbee3becef44e298f59d72cee3c8e552bccb10d65 (diff)
downloadellinika-ce29f168ed52f08228b99a789785271a4a3c9b0e.tar.gz
ellinika-ce29f168ed52f08228b99a789785271a4a3c9b0e.tar.bz2
Implement escape function.
Diffstat (limited to 'src/ellinika') (more/less context) (ignore whitespace changes)
-rw-r--r--src/ellinika/.gitignore1
-rw-r--r--src/ellinika/Makefile.am6
-rw-r--r--src/ellinika/elmorph.c41
-rw-r--r--src/ellinika/elmorph.h2
-rw-r--r--src/ellinika/sql.scm33
-rw-r--r--src/ellinika/tests/escape.scm6
-rw-r--r--src/ellinika/utf8scm.c149
7 files changed, 196 insertions, 42 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore
index 11bf478..2c9a340 100644
--- a/src/ellinika/.gitignore
+++ b/src/ellinika/.gitignore
@@ -5,3 +5,4 @@ elmorph.scm
elmorph.x
phoneme.c
phoneme.h
+utf8scm.x
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
index b8988d4..581a537 100644
--- a/src/ellinika/Makefile.am
+++ b/src/ellinika/Makefile.am
@@ -22,7 +22,8 @@ guile_DATA=\
config.scm\
dico.scm\
elmorph.scm\
- tenses.scm
+ tenses.scm\
+ sql.scm
cgi.m4: Makefile
echo 'divert(-1)' > $@
@@ -57,13 +58,14 @@ pkglib_LTLIBRARIES=libelmorph.la
libelmorph_la_SOURCES = \
aorist.c\
utf8.c\
+ utf8scm.c\
elchr.c\
elmorph.c\
elmorph.h\
phoneme.y\
syllabificator.c
-DOT_X_FILES = elmorph.x
+DOT_X_FILES = elmorph.x utf8scm.x
BUILT_SOURCES = $(DOT_X_FILES)
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 6d6a69a..56bead3 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -769,46 +769,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_phoneme, "elstr-char-phoneme",
}
#undef FUNC_NAME
-SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
- (SCM string),
-"Convert STRING to uppercase\n")
-#define FUNC_NAME s_scm_utf8_toupper
-{
- char *str;
- SCM scm;
-
- SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- if (utf8_toupper(str, strlen(str)))
- scm_misc_error(FUNC_NAME,
- "cannot convert to upper case: ~S",
- scm_list_1(string));
- scm = scm_from_locale_string(str);
- free(str);
- return scm;
-}
-#undef FUNC_NAME
-
-SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
- (SCM string),
-"Convert STRING to lowercase\n")
-#define FUNC_NAME s_scm_utf8_tolower
-{
- char *str;
- SCM scm;
-
- SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- if (utf8_tolower(str, strlen(str)))
- scm_misc_error(FUNC_NAME,
- "cannot convert to lower case: ~S",
- scm_list_1(string));
- scm = scm_from_locale_string(str);
- free(str);
- return scm;
-}
-#undef FUNC_NAME
-
static SCM
_elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
{
@@ -1036,4 +996,5 @@ scm_init_ellinika_elmorph_module()
scm_c_export(deftab[i].sym, NULL);
}
#include "elmorph.x"
+ elmorph_utf8scm_init();
}
diff --git a/src/ellinika/elmorph.h b/src/ellinika/elmorph.h
index c0df60f..83e1c83 100644
--- a/src/ellinika/elmorph.h
+++ b/src/ellinika/elmorph.h
@@ -137,3 +137,5 @@ int phoneme_map(struct phoneme **pph, size_t *plen,
unsigned *word, size_t len);
int syllable_map(struct syllable **psyl, size_t *plen,
struct phoneme *phon, size_t nphon);
+
+void elmorph_utf8scm_init(void);
diff --git a/src/ellinika/sql.scm b/src/ellinika/sql.scm
new file mode 100644
index 0000000..c0301ad
--- a/dev/null
+++ b/src/ellinika/sql.scm
@@ -0,0 +1,33 @@
+(define-module (ellinika sql))
+
+(use-modules (srfi srfi-1)
+ (ellinika elmorph)
+ (gamma sql))
+
+(define ellinika:sql-verbose #f)
+(define ellinika:sql-conn #f)
+
+(define (ellinika:sql-connect arg)
+ (set! ellinika:sql-conn (sql-open-connection args))
+ (if ellinika:sql-conn
+ (sql-query ellinika:sql-conn "SET NAMES utf8"))
+ ellinika:sql-conn)
+
+(define (ellinika:sql-disconnect)
+ (if ellinika:sql-conn (sql-close-connection ellinika:sql-conn)))
+
+(define (ellinika:sql-query format . rest)
+ (let ((query (apply format #f
+ (map (lambda (arg)
+ (if arg
+ "NULL"
+ (utf8-escape arg)))
+ rest))))
+ (cond
+ (ellinika:sql-verbose
+ (format #f "QUERY: ~A~%" query)
+ (let ((res (sql-query ellinika:sql-conn query)))
+ (format #f "RES: ~A~%" res)
+ res))
+ (else
+ (sql-query ellinika:sql-conn query)))))
diff --git a/src/ellinika/tests/escape.scm b/src/ellinika/tests/escape.scm
new file mode 100644
index 0000000..841e496
--- a/dev/null
+++ b/src/ellinika/tests/escape.scm
@@ -0,0 +1,6 @@
+(load-extension "./libelmorph" "scm_init_ellinika_elmorph_module")
+
+(display (utf8-escape "ο \"παρα\\κείμενος\""))
+(newline)
+(display (utf8-escape "ο \"παρα\\κείμενος\"" "ία" ))
+(newline)
diff --git a/src/ellinika/utf8scm.c b/src/ellinika/utf8scm.c
new file mode 100644
index 0000000..89f5fba
--- a/dev/null
+++ b/src/ellinika/utf8scm.c
@@ -0,0 +1,149 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <libguile.h>
+#include "utf8.h"
+
+
+SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
+ (SCM string),
+"Convert STRING to uppercase\n")
+#define FUNC_NAME s_scm_utf8_toupper
+{
+ char *str;
+ SCM scm;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ str = scm_to_locale_string(string);
+ if (utf8_toupper(str, strlen(str)))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert to upper case: ~S",
+ scm_list_1(string));
+ scm = scm_from_locale_string(str);
+ free(str);
+ return scm;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
+ (SCM string),
+"Convert STRING to lowercase\n")
+#define FUNC_NAME s_scm_utf8_tolower
+{
+ char *str;
+ SCM scm;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ str = scm_to_locale_string(string);
+ if (utf8_tolower(str, strlen(str)))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert to lower case: ~S",
+ scm_list_1(string));
+ scm = scm_from_locale_string(str);
+ free(str);
+ return scm;
+}
+#undef FUNC_NAME
+
+static int
+memberof(unsigned *w, size_t len, unsigned c)
+{
+ while (len--)
+ if (*w++ == c)
+ return 1;
+ return 0;
+}
+
+SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0,
+ (SCM string, SCM escapable),
+"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n")
+#define FUNC_NAME s_scm_utf8_escape
+{
+ SCM scm;
+ unsigned *escptr, *escbase;
+ size_t esclen;
+ char *s;
+ unsigned *wptr, *nptr;
+ size_t wlen;
+ size_t incr, i;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ s = scm_to_locale_string(string);
+
+ if (utf8_mbstr_to_wc(s, &wptr, &wlen))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert ~S to UTF-8",
+ scm_list_1(string));
+ free(s);
+
+ if (SCM_UNBNDP(escapable)) {
+ static unsigned default_escapable[] = { '\"', '\\' };
+ escbase = NULL;
+ escptr = default_escapable;
+ esclen = 2;
+ } else {
+ SCM_ASSERT(scm_is_string(escapable), escapable,
+ SCM_ARG2, FUNC_NAME);
+ s = scm_to_locale_string(escapable);
+ if (utf8_mbstr_to_wc(s, &escbase, &esclen)) {
+ free(wptr);
+ scm_misc_error(FUNC_NAME,
+ "cannot convert ~S to UTF-8",
+ scm_list_1(escapable));
+ }
+ escptr = escbase;
+ free(s);
+ }
+
+ incr = 0;
+ for (i = 0; i < wlen; i++)
+ if (memberof(escptr, esclen, wptr[i]))
+ incr++;
+
+
+ nptr = calloc(sizeof(nptr[0]), wlen + incr);
+ if (!nptr)
+ scm_memory_error(FUNC_NAME);
+
+ for (i = incr = 0; i < wlen; i++) {
+ if (memberof(escptr, esclen, wptr[i]))
+ nptr[i + incr++] = '\\';
+ nptr[i + incr] = wptr[i];
+ }
+
+ free(wptr);
+ free(escbase);
+
+ if (utf8_wc_to_mbstr(nptr, wlen + incr, &s))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert UTF-8 to Scheme",
+ SCM_EOL);
+ scm = scm_from_locale_string(s);
+ free(s);
+ return scm;
+}
+#undef FUNC_NAME
+
+void
+elmorph_utf8scm_init()
+{
+#include "utf8scm.x"
+}

Return to:

Send suggestions and report system problems to the System administrator.