diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2012-03-13 00:13:06 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2012-03-13 00:13:06 +0200 |
commit | ae2bd0092262665c2f1794529db139d8f06a4d57 (patch) | |
tree | 21fea6be6618ba1773aa65c06eb4691c6263a9ac | |
parent | fb26217c55644bcce8b1d764e2ea77ddbbda3dba (diff) | |
download | swis-ae2bd0092262665c2f1794529db139d8f06a4d57.tar.gz swis-ae2bd0092262665c2f1794529db139d8f06a4d57.tar.bz2 |
* src/swis.in: Set default backend to "store".
* src/word-split.c: Fix offset counting.
-rw-r--r-- | src/swis.in | 2 | ||||
-rw-r--r-- | src/word-split.c | 62 |
2 files changed, 36 insertions, 28 deletions
diff --git a/src/swis.in b/src/swis.in index 55e2bb5..abc56bf 100644 --- a/src/swis.in +++ b/src/swis.in @@ -192,7 +192,7 @@ if [ -r $SWIS_CONFIG ]; then if test -z "$backend"; then backend=cat else - backend=swis-store + backend=store fi else die 1 "configuration file $SWIS_CONFIG does not exist or is unreadable" diff --git a/src/word-split.c b/src/word-split.c index 24f9b4e..8bff1dd 100644 --- a/src/word-split.c +++ b/src/word-split.c @@ -44,44 +44,55 @@ open_input () } struct obstack stk; +struct header +{ + int visible; + size_t len; +}; + size_t wordcount; void flush_stack () { char *start, *p; - char **wtab; size_t i; size_t offset = 0; - + struct header hdr; + if (!wordcount) return; start = obstack_finish (&stk); - wtab = xcalloc (wordcount, sizeof wtab[0]); - for (i = 0, p = start; i < wordcount; i++, p += strlen (p) + 1) - wtab[i] = p; if (full_text_option) { fputs (">*", output); - for (i = 0; i < wordcount; i++) + for (i = 0, p = start; i < wordcount; i++) { - fputs (wtab[i], output); + memcpy (&hdr, p, sizeof (hdr)); + p += sizeof (hdr); + fwrite (p, hdr.len, 1, output); fputc (' ', output); + p += hdr.len; } fputc ('\n', output); } offset = 0; - for (i = 0; i < wordcount; i++) + for (i = 0, p = start; i < wordcount; i++) { - fprintf (output, "%lu ", (unsigned long) offset); - fputs (wtab[i], output); - fputc ('\n', output); - offset += swis_utf8_strlen (wtab[i]) + (i > 0); + memcpy (&hdr, p, sizeof (hdr)); + p += sizeof (hdr); + if (hdr.visible) + { + fprintf (output, "%lu ", (unsigned long) offset); + fwrite (p, hdr.len, 1, output); + fputc ('\n', output); + } + p += hdr.len; + offset += hdr.len + 1; } - free (wtab); obstack_free (&stk, start); wordcount = 0; } @@ -94,15 +105,13 @@ flush_word (const unsigned *wordbuf) char *wbuf; size_t wblen; size_t wbc; + struct header hdr; - if (!wordbuf) + if (!wordbuf[0]) return; wordlen = swis_mbutf8_strlen (wordbuf); - if (wordlen <= min_length) - return; - wblen = wordlen * 4; wbuf = xmalloc (wblen + 1); @@ -116,13 +125,15 @@ flush_word (const unsigned *wordbuf) memcpy (wbuf + wbc, r, rc); wbc += rc; } - wbuf[wbc++] = 0; + wbuf[wbc] = 0; + + hdr.visible = !(wordlen <= min_length || + badword_p (wordbuf) || excluded_word_p (wbuf)); + hdr.len = wbc; + obstack_grow (&stk, &hdr, sizeof (hdr)); + obstack_grow (&stk, wbuf, wbc); + wordcount++; - if (!excluded_word_p (wbuf)) - { - obstack_grow (&stk, wbuf, wbc); - wordcount++; - } free (wbuf); } @@ -154,10 +165,7 @@ word_split () int after_newline = 1; while ((w = fget_word (input, skip_tag, &after_newline))) - { - if (!badword_p (w)) - flush_word (w); - } + flush_word (w); flush_stack (); return !open_input (); } |