/* This file is part of Ellinika project.
   Copyright (C) 2011 Sergey Poznyakoff

   Ellinika is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Ellinika is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <errno.h>
#include <stdlib.h>
#include <libguile.h>
#include "utf8.h"


SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
		  (SCM string),
"Convert STRING to uppercase\n")
#define FUNC_NAME s_scm_utf8_toupper
{
	char *str;
	SCM scm;
	
	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
	str = scm_to_locale_string(string);
	if (utf8_toupper(str, strlen(str)))
		scm_misc_error(FUNC_NAME,
			       "cannot convert to upper case: ~A",
			       scm_list_1(string));
	scm = scm_from_locale_string(str);
	free(str);
	return scm;
}
#undef FUNC_NAME

SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
		  (SCM string),
"Convert STRING to lowercase\n")
#define FUNC_NAME s_scm_utf8_tolower
{
	char *str;
	SCM scm;
	
	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
	str = scm_to_locale_string(string);
	if (utf8_tolower(str, strlen(str)))
		scm_misc_error(FUNC_NAME,
			       "cannot convert to lower case: ~A",
			       scm_list_1(string));
	scm = scm_from_locale_string(str);
	free(str);
	return scm;
}
#undef FUNC_NAME

static int
memberof(unsigned *w, size_t len, unsigned c)
{
	while (len--)
		if (*w++ == c)
			return 1;
	return 0;
}

SCM_DEFINE_PUBLIC(scm_utf8_escape, "utf8-escape", 1, 1, 0,
		  (SCM string, SCM escapable),
"Prefix with \\ each occurrence of ESCAPABLE chars in STRING\n")
#define FUNC_NAME s_scm_utf8_escape
{
	SCM scm;
	unsigned *escptr, *escbase;
	size_t esclen;
	char *s;	
	unsigned *wptr, *nptr;
	size_t wlen;
	size_t incr, i;
	
	SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
	s = scm_to_locale_string(string);

	if (utf8_mbstr_to_wc(s, &wptr, &wlen))
		scm_misc_error(FUNC_NAME,
			       "cannot convert ~A to UTF-8",
			       scm_list_1(string));
	free(s);
	
	if (SCM_UNBNDP(escapable)) {
		static unsigned default_escapable[] = { '\"', '\\' };
		escbase = NULL;
		escptr = default_escapable;
		esclen = 2;
	} else {
		SCM_ASSERT(scm_is_string(escapable), escapable,
			   SCM_ARG2, FUNC_NAME);
		s = scm_to_locale_string(escapable);
		if (utf8_mbstr_to_wc(s, &escbase, &esclen)) {
			free(wptr);
			scm_misc_error(FUNC_NAME,
				       "cannot convert ~A to UTF-8",
				       scm_list_1(escapable));
		}
		escptr = escbase;
		free(s);
	}

	incr = 0;
	for (i = 0; i < wlen; i++)
		if (memberof(escptr, esclen, wptr[i]))
			incr++;
	

	nptr = calloc(sizeof(nptr[0]), wlen + incr);
	if (!nptr)
		scm_memory_error(FUNC_NAME);

	for (i = incr = 0; i < wlen; i++) {
		if (memberof(escptr, esclen, wptr[i]))
			nptr[i + incr++] = '\\';
		nptr[i + incr] = wptr[i];
	}

	free(wptr);
	free(escbase);

	if (utf8_wc_to_mbstr(nptr, wlen + incr, &s))
		scm_misc_error(FUNC_NAME,
			       "cannot convert UTF-8 to Scheme",
			       SCM_EOL);
	scm = scm_from_locale_string(s);
	free(s);
	return scm;
}
#undef FUNC_NAME

void
elmorph_utf8scm_init()
{
#include "utf8scm.x"
}