diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-04 13:12:10 +0000 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-04 13:12:10 +0000 |
commit | 8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec (patch) | |
tree | acd3235e1e2cdbff16bfb0d9d8ee09065171968e /src/ellinika/elmorph.c | |
parent | aeb69dcd0e430b5e539c5ca33c73703dad59253d (diff) | |
download | ellinika-8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec.tar.gz ellinika-8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec.tar.bz2 |
Add new functions for operations over elstrs.
* src/ellinika/utf8.c (utf8_wc_strnchr)
(utf8_wc_strnstr): New functions.
* src/ellinika/utf8.h (utf8_wc_strnchr)
(utf8_wc_strnstr): New protos.
* src/ellinika/elmorph.c (_elstr_alloc): Reuse existing sylmap.
(_elstr_slice): New function.
(elstr-slice,elstr-slice!,elstr-index): New functions.
* src/ellinika/elmorph.scm4 (elstr-trim,elstr-trim!): New functions.
git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@562 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'src/ellinika/elmorph.c')
-rw-r--r-- | src/ellinika/elmorph.c | 107 |
1 files changed, 103 insertions, 4 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 88520a7..6ff5f01 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -43,8 +43,11 @@ _elstr_syllabize(struct elstr *elstr) unsigned i, nsyl = 0, accsyl = 0, accchr = 0; int dstate = 0; int acc = 0; - - sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, "syllable map"); + + if (!elstr->sylmap) + elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, + "syllable map"); + sylmap = elstr->sylmap; for (i = 0; i < elstr->len; i++) { int nstate; @@ -66,7 +69,6 @@ _elstr_syllabize(struct elstr *elstr) sylmap[nsyl++] = i - 1; else sylmap[nsyl-1] = i - 1; - elstr->sylmap = sylmap; elstr->nsyl = nsyl; elstr->acc_pos = accchr; elstr->acc_syl = nsyl - accsyl; @@ -85,7 +87,7 @@ _elstr_alloc(const char *instr) elstr = scm_gc_malloc(sizeof(*elstr), "Elstr"); elstr->str = wptr; elstr->len = wlen; - + elstr->sylmap = NULL; _elstr_syllabize(elstr); SCM_RETURN_NEWSMOB(_elstr_tag, elstr); @@ -540,6 +542,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask", unsigned num; SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); + SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); elstr = (struct elstr*) SCM_CDR(el); num = scm_to_uint(n); if (num >= elstr->len) @@ -655,6 +658,102 @@ SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy_x, "elstr-thema-aoristoy!", 1, 0, 0, return _elstr_thema_aoristoy(thema, 1, FUNC_NAME); } #undef FUNC_NAME + +static SCM +_elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) +{ + struct elstr *elstr; + int num; + unsigned len; + SCM scm; + + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); + SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); + elstr = (struct elstr*) SCM_CDR(el); + num = scm_to_int(n); + len = scm_to_uint(l); + + if (num < 0) + num += elstr->len; + if (num < 0 || num >= elstr->len || num + len > elstr->len) + scm_misc_error(func_name, + "invalid offset or length", + SCM_EOL); + + if (destructive) + scm = SCM_UNSPECIFIED; + else { + scm = _elstr_dup(elstr); + elstr = (struct elstr*) SCM_CDR(scm); + } + + if (num) + memmove(elstr->str, elstr->str + num, + sizeof(elstr->str[0]) * len); + elstr->len = len; + _elstr_syllabize(elstr); + return scm; +} + +SCM_DEFINE_PUBLIC(scm_elstr_slice, "elstr-slice", + 3, 0, 0, + (SCM word, SCM off, SCM len), +"Extract LEN characters from WORD starting from position OFF\n") +#define FUNC_NAME s_scm_elstr_slice +{ + return _elstr_slice(word, off, len, 0, FUNC_NAME); +} +#undef FUNC_NAME + +SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!", + 3, 0, 0, + (SCM word, SCM off, SCM len), +"Extract LEN characters from WORD starting from position OFF (destructive)\n") +#define FUNC_NAME s_scm_elstr_slice_x +{ + return _elstr_slice(word, off, len, 1, FUNC_NAME); +} +#undef FUNC_NAME + +SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", + 2, 0, 0, + (SCM word, SCM needle), +"") +#define FUNC_NAME s_scm_elstr_index +{ + struct elstr *elstr; + unsigned *wc, *wtmp = NULL, *p; + unsigned wlen; + + SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); + elstr = (struct elstr*) SCM_CDR(word); + if (scm_is_elstr(needle)) { + struct elstr *ep = (struct elstr*) SCM_CDR(needle); + wc = ep->str; + wlen = ep->len; + } else { + SCM scm; + char *str; + + SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME); + str = scm_to_locale_string(needle); + if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) { + free(str); + scm_misc_error(FUNC_NAME, + "Invalid needle string: ~S", + scm_list_1(needle)); + } + free(str); + wc = wtmp; + } + p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen); + free(wtmp); + if (p) + return scm_from_int(p - elstr->str); + return SCM_BOOL_F; +} +#undef FUNC_NAME void |