/* This file is part of Ellinika project. Copyright (C) 2011 Sergey Poznyakoff Ellinika is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Ellinika is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include "utf8.h" SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0, (SCM string), "Convert STRING to uppercase\n") #define FUNC_NAME s_scm_utf8_toupper { char *str; SCM scm; SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); str = scm_to_locale_string(string); if (utf8_toupper(str, strlen(str))) scm_misc_error(FUNC_NAME, "cannot convert to upper case: ~A", scm_list_1(string)); scm = scm_from_locale_string(str); free(str); return scm; } #undef FUNC_NAME SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0, (SCM string), "Convert STRING to lowercase\n") #define FUNC_NAME s_scm_utf8_tolower { char *str; SCM scm; SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); str = scm_to_locale_string(string); if (utf8_tolower(str, strlen(str))) scm_misc_error(FUNC_NAME, "cannot convert to lower case: ~A", scm_list_1(string)); scm = scm_from_locale_string(str); free(str); return scm; } #undef FUNC_NAME static int memberof(unsigned *w, size_t len, unsigned c) { while (len--) if (*w++ == c) return 1; return 0; } SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0, (SCM string, SCM escapable), "Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n") #define FUNC_NAME s_scm_utf8_escape { SCM scm; unsigned *escptr, *escbase; size_t esclen; char *s; unsigned *wptr, *nptr; size_t wlen; size_t incr, i; SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); s = scm_to_locale_string(string); if (utf8_mbstr_to_wc(s, &wptr, &wlen)) scm_misc_error(FUNC_NAME, "cannot convert ~A to UTF-8", scm_list_1(string)); free(s); if (SCM_UNBNDP(escapable)) { static unsigned default_escapable[] = { '\"', '\\' }; escbase = NULL; escptr = default_escapable; esclen = 2; } else { SCM_ASSERT(scm_is_string(escapable), escapable, SCM_ARG2, FUNC_NAME); s = scm_to_locale_string(escapable); if (utf8_mbstr_to_wc(s, &escbase, &esclen)) { free(wptr); scm_misc_error(FUNC_NAME, "cannot convert ~A to UTF-8", scm_list_1(escapable)); } escptr = escbase; free(s); } incr = 0; for (i = 0; i < wlen; i++) if (memberof(escptr, esclen, wptr[i])) incr++; nptr = calloc(sizeof(nptr[0]), wlen + incr); if (!nptr) scm_memory_error(FUNC_NAME); for (i = incr = 0; i < wlen; i++) { if (memberof(escptr, esclen, wptr[i])) nptr[i + incr++] = '\\'; nptr[i + incr] = wptr[i]; } free(wptr); free(escbase); if (utf8_wc_to_mbstr(nptr, wlen + incr, &s)) scm_misc_error(FUNC_NAME, "cannot convert UTF-8 to Scheme", SCM_EOL); scm = scm_from_locale_string(s); free(s); return scm; } #undef FUNC_NAME void elmorph_utf8scm_init() { #include "utf8scm.x" }