aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika/elmorph.c
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-04 13:12:10 +0000
committerSergey Poznyakoff <gray@gnu.org.ua>2011-06-04 13:12:10 +0000
commit8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec (patch)
treeacd3235e1e2cdbff16bfb0d9d8ee09065171968e /src/ellinika/elmorph.c
parentaeb69dcd0e430b5e539c5ca33c73703dad59253d (diff)
downloadellinika-8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec.tar.gz
ellinika-8d4d5b758ef1f09a4ed39a25a07a09f9c26d0aec.tar.bz2
Add new functions for operations over elstrs.
* src/ellinika/utf8.c (utf8_wc_strnchr) (utf8_wc_strnstr): New functions. * src/ellinika/utf8.h (utf8_wc_strnchr) (utf8_wc_strnstr): New protos. * src/ellinika/elmorph.c (_elstr_alloc): Reuse existing sylmap. (_elstr_slice): New function. (elstr-slice,elstr-slice!,elstr-index): New functions. * src/ellinika/elmorph.scm4 (elstr-trim,elstr-trim!): New functions. git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@562 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'src/ellinika/elmorph.c')
-rw-r--r--src/ellinika/elmorph.c107
1 files changed, 103 insertions, 4 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 88520a7..6ff5f01 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -43,8 +43,11 @@ _elstr_syllabize(struct elstr *elstr)
unsigned i, nsyl = 0, accsyl = 0, accchr = 0;
int dstate = 0;
int acc = 0;
-
- sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, "syllable map");
+
+ if (!elstr->sylmap)
+ elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len,
+ "syllable map");
+ sylmap = elstr->sylmap;
for (i = 0; i < elstr->len; i++) {
int nstate;
@@ -66,7 +69,6 @@ _elstr_syllabize(struct elstr *elstr)
sylmap[nsyl++] = i - 1;
else
sylmap[nsyl-1] = i - 1;
- elstr->sylmap = sylmap;
elstr->nsyl = nsyl;
elstr->acc_pos = accchr;
elstr->acc_syl = nsyl - accsyl;
@@ -85,7 +87,7 @@ _elstr_alloc(const char *instr)
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
elstr->str = wptr;
elstr->len = wlen;
-
+ elstr->sylmap = NULL;
_elstr_syllabize(elstr);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
@@ -540,6 +542,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
unsigned num;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
+ SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_uint(n);
if (num >= elstr->len)
@@ -655,6 +658,102 @@ SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy_x, "elstr-thema-aoristoy!", 1, 0, 0,
return _elstr_thema_aoristoy(thema, 1, FUNC_NAME);
}
#undef FUNC_NAME
+
+static SCM
+_elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
+{
+ struct elstr *elstr;
+ int num;
+ unsigned len;
+ SCM scm;
+
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
+ SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ num = scm_to_int(n);
+ len = scm_to_uint(l);
+
+ if (num < 0)
+ num += elstr->len;
+ if (num < 0 || num >= elstr->len || num + len > elstr->len)
+ scm_misc_error(func_name,
+ "invalid offset or length",
+ SCM_EOL);
+
+ if (destructive)
+ scm = SCM_UNSPECIFIED;
+ else {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
+
+ if (num)
+ memmove(elstr->str, elstr->str + num,
+ sizeof(elstr->str[0]) * len);
+ elstr->len = len;
+ _elstr_syllabize(elstr);
+ return scm;
+}
+
+SCM_DEFINE_PUBLIC(scm_elstr_slice, "elstr-slice",
+ 3, 0, 0,
+ (SCM word, SCM off, SCM len),
+"Extract LEN characters from WORD starting from position OFF\n")
+#define FUNC_NAME s_scm_elstr_slice
+{
+ return _elstr_slice(word, off, len, 0, FUNC_NAME);
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
+ 3, 0, 0,
+ (SCM word, SCM off, SCM len),
+"Extract LEN characters from WORD starting from position OFF (destructive)\n")
+#define FUNC_NAME s_scm_elstr_slice_x
+{
+ return _elstr_slice(word, off, len, 1, FUNC_NAME);
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
+ 2, 0, 0,
+ (SCM word, SCM needle),
+"")
+#define FUNC_NAME s_scm_elstr_index
+{
+ struct elstr *elstr;
+ unsigned *wc, *wtmp = NULL, *p;
+ unsigned wlen;
+
+ SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
+ elstr = (struct elstr*) SCM_CDR(word);
+ if (scm_is_elstr(needle)) {
+ struct elstr *ep = (struct elstr*) SCM_CDR(needle);
+ wc = ep->str;
+ wlen = ep->len;
+ } else {
+ SCM scm;
+ char *str;
+
+ SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
+ str = scm_to_locale_string(needle);
+ if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
+ free(str);
+ scm_misc_error(FUNC_NAME,
+ "Invalid needle string: ~S",
+ scm_list_1(needle));
+ }
+ free(str);
+ wc = wtmp;
+ }
+ p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
+ free(wtmp);
+ if (p)
+ return scm_from_int(p - elstr->str);
+ return SCM_BOOL_F;
+}
+#undef FUNC_NAME
void

Return to:

Send suggestions and report system problems to the System administrator.