/* This file is part of Ellinika project.
Copyright (C) 2011 Sergey Poznyakoff
Ellinika is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Ellinika is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include "utf8.h"
SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
(SCM string),
"Convert STRING to uppercase\n")
#define FUNC_NAME s_scm_utf8_toupper
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_toupper(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to upper case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
(SCM string),
"Convert STRING to lowercase\n")
#define FUNC_NAME s_scm_utf8_tolower
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_tolower(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to lower case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
static int
memberof(unsigned *w, size_t len, unsigned c)
{
while (len--)
if (*w++ == c)
return 1;
return 0;
}
SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0,
(SCM string, SCM escapable),
"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n")
#define FUNC_NAME s_scm_utf8_escape
{
SCM scm;
unsigned *escptr, *escbase;
size_t esclen;
char *s;
unsigned *wptr, *nptr;
size_t wlen;
size_t incr, i;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
s = scm_to_locale_string(string);
if (utf8_mbstr_to_wc(s, &wptr, &wlen))
scm_misc_error(FUNC_NAME,
"cannot convert ~S to UTF-8",
scm_list_1(string));
free(s);
if (SCM_UNBNDP(escapable)) {
static unsigned default_escapable[] = { '\"', '\\' };
escbase = NULL;
escptr = default_escapable;
esclen = 2;
} else {
SCM_ASSERT(scm_is_string(escapable), escapable,
SCM_ARG2, FUNC_NAME);
s = scm_to_locale_string(escapable);
if (utf8_mbstr_to_wc(s, &escbase, &esclen)) {
free(wptr);
scm_misc_error(FUNC_NAME,
"cannot convert ~S to UTF-8",
scm_list_1(escapable));
}
escptr = escbase;
free(s);
}
incr = 0;
for (i = 0; i < wlen; i++)
if (memberof(escptr, esclen, wptr[i]))
incr++;
nptr = calloc(sizeof(nptr[0]), wlen + incr);
if (!nptr)
scm_memory_error(FUNC_NAME);
for (i = incr = 0; i < wlen; i++) {
if (memberof(escptr, esclen, wptr[i]))
nptr[i + incr++] = '\\';
nptr[i + incr] = wptr[i];
}
free(wptr);
free(escbase);
if (utf8_wc_to_mbstr(nptr, wlen + incr, &s))
scm_misc_error(FUNC_NAME,
"cannot convert UTF-8 to Scheme",
SCM_EOL);
scm = scm_from_locale_string(s);
free(s);
return scm;
}
#undef FUNC_NAME
void
elmorph_utf8scm_init()
{
#include "utf8scm.x"
}