1 files changed, 149 insertions, 0 deletions
diff --git a/src/ellinika/utf8scm.c b/src/ellinika/utf8scm.c
new file mode 100644
index 0000000..89f5fba
--- /dev/null
+++ b/src/ellinika/utf8scm.c
@@ -0,0 +1,149 @@
+/* This file is part of Ellinika project.
+   Copyright (C) 2011 Sergey Poznyakoff
+
+   Ellinika is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Ellinika is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <libguile.h>
+#include "utf8.h"
+
+
+SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
+		  (SCM string),
+"Convert STRING to uppercase\n")
+#define FUNC_NAME s_scm_utf8_toupper
+{
+	char *str;
+	SCM scm;
+	
+	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+	str = scm_to_locale_string(string);
+	if (utf8_toupper(str, strlen(str)))
+		scm_misc_error(FUNC_NAME,
+			       "cannot convert to upper case: ~S",
+			       scm_list_1(string));
+	scm = scm_from_locale_string(str);
+	free(str);
+	return scm;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
+		  (SCM string),
+"Convert STRING to lowercase\n")
+#define FUNC_NAME s_scm_utf8_tolower
+{
+	char *str;
+	SCM scm;
+	
+	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+	str = scm_to_locale_string(string);
+	if (utf8_tolower(str, strlen(str)))
+		scm_misc_error(FUNC_NAME,
+			       "cannot convert to lower case: ~S",
+			       scm_list_1(string));
+	scm = scm_from_locale_string(str);
+	free(str);
+	return scm;
+}
+#undef FUNC_NAME
+
+static int
+memberof(unsigned *w, size_t len, unsigned c)
+{
+	while (len--)
+		if (*w++ == c)
+			return 1;
+	return 0;
+}
+
+SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0,
+		  (SCM string, SCM escapable),
+"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n")
+#define FUNC_NAME s_scm_utf8_escape
+{
+	SCM scm;
+	unsigned *escptr, *escbase;
+	size_t esclen;
+	char *s;	
+	unsigned *wptr, *nptr;
+	size_t wlen;
+	size_t incr, i;
+	
+	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
+	s = scm_to_locale_string(string);
+
+	if (utf8_mbstr_to_wc(s, &wptr, &wlen))
+		scm_misc_error(FUNC_NAME,
+			       "cannot convert ~S to UTF-8",
+			       scm_list_1(string));
+	free(s);
+	
+	if (SCM_UNBNDP(escapable)) {
+		static unsigned default_escapable[] = { '\"', '\\' };
+		escbase = NULL;
+		escptr = default_escapable;
+		esclen = 2;
+	} else {
+		SCM_ASSERT(scm_is_string(escapable), escapable,
+			   SCM_ARG2, FUNC_NAME);
+		s = scm_to_locale_string(escapable);
+		if (utf8_mbstr_to_wc(s, &escbase, &esclen)) {
+			free(wptr);
+			scm_misc_error(FUNC_NAME,
+				       "cannot convert ~S to UTF-8",
+				       scm_list_1(escapable));
+		}
+		escptr = escbase;
+		free(s);
+	}
+
+	incr = 0;
+	for (i = 0; i < wlen; i++)
+		if (memberof(escptr, esclen, wptr[i]))
+			incr++;
+	
+
+	nptr = calloc(sizeof(nptr[0]), wlen + incr);
+	if (!nptr)
+		scm_memory_error(FUNC_NAME);
+
+	for (i = incr = 0; i < wlen; i++) {
+		if (memberof(escptr, esclen, wptr[i]))
+			nptr[i + incr++] = '\\';
+		nptr[i + incr] = wptr[i];
+	}
+
+	free(wptr);
+	free(escbase);
+
+	if (utf8_wc_to_mbstr(nptr, wlen + incr, &s))
+		scm_misc_error(FUNC_NAME,
+			       "cannot convert UTF-8 to Scheme",
+			       SCM_EOL);
+	scm = scm_from_locale_string(s);
+	free(s);
+	return scm;
+}
+#undef FUNC_NAME
+
+void
+elmorph_utf8scm_init()
+{
+#include "utf8scm.x"
+}