aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika/utf8scm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/ellinika/utf8scm.c')
-rw-r--r--src/ellinika/utf8scm.c149
1 files changed, 149 insertions, 0 deletions
diff --git a/src/ellinika/utf8scm.c b/src/ellinika/utf8scm.c
new file mode 100644
index 0000000..89f5fba
--- /dev/null
+++ b/src/ellinika/utf8scm.c
@@ -0,0 +1,149 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <libguile.h>
+#include "utf8.h"
+
+
+SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
+ (SCM string),
+"Convert STRING to uppercase\n")
+#define FUNC_NAME s_scm_utf8_toupper
+{
+ char *str;
+ SCM scm;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ str = scm_to_locale_string(string);
+ if (utf8_toupper(str, strlen(str)))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert to upper case: ~S",
+ scm_list_1(string));
+ scm = scm_from_locale_string(str);
+ free(str);
+ return scm;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
+ (SCM string),
+"Convert STRING to lowercase\n")
+#define FUNC_NAME s_scm_utf8_tolower
+{
+ char *str;
+ SCM scm;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ str = scm_to_locale_string(string);
+ if (utf8_tolower(str, strlen(str)))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert to lower case: ~S",
+ scm_list_1(string));
+ scm = scm_from_locale_string(str);
+ free(str);
+ return scm;
+}
+#undef FUNC_NAME
+
+static int
+memberof(unsigned *w, size_t len, unsigned c)
+{
+ while (len--)
+ if (*w++ == c)
+ return 1;
+ return 0;
+}
+
+SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0,
+ (SCM string, SCM escapable),
+"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n")
+#define FUNC_NAME s_scm_utf8_escape
+{
+ SCM scm;
+ unsigned *escptr, *escbase;
+ size_t esclen;
+ char *s;
+ unsigned *wptr, *nptr;
+ size_t wlen;
+ size_t incr, i;
+
+ SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+ s = scm_to_locale_string(string);
+
+ if (utf8_mbstr_to_wc(s, &wptr, &wlen))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert ~S to UTF-8",
+ scm_list_1(string));
+ free(s);
+
+ if (SCM_UNBNDP(escapable)) {
+ static unsigned default_escapable[] = { '\"', '\\' };
+ escbase = NULL;
+ escptr = default_escapable;
+ esclen = 2;
+ } else {
+ SCM_ASSERT(scm_is_string(escapable), escapable,
+ SCM_ARG2, FUNC_NAME);
+ s = scm_to_locale_string(escapable);
+ if (utf8_mbstr_to_wc(s, &escbase, &esclen)) {
+ free(wptr);
+ scm_misc_error(FUNC_NAME,
+ "cannot convert ~S to UTF-8",
+ scm_list_1(escapable));
+ }
+ escptr = escbase;
+ free(s);
+ }
+
+ incr = 0;
+ for (i = 0; i < wlen; i++)
+ if (memberof(escptr, esclen, wptr[i]))
+ incr++;
+
+
+ nptr = calloc(sizeof(nptr[0]), wlen + incr);
+ if (!nptr)
+ scm_memory_error(FUNC_NAME);
+
+ for (i = incr = 0; i < wlen; i++) {
+ if (memberof(escptr, esclen, wptr[i]))
+ nptr[i + incr++] = '\\';
+ nptr[i + incr] = wptr[i];
+ }
+
+ free(wptr);
+ free(escbase);
+
+ if (utf8_wc_to_mbstr(nptr, wlen + incr, &s))
+ scm_misc_error(FUNC_NAME,
+ "cannot convert UTF-8 to Scheme",
+ SCM_EOL);
+ scm = scm_from_locale_string(s);
+ free(s);
+ return scm;
+}
+#undef FUNC_NAME
+
+void
+elmorph_utf8scm_init()
+{
+#include "utf8scm.x"
+}

Return to:

Send suggestions and report system problems to the System administrator.