aboutsummaryrefslogtreecommitdiff
path: root/tools/wordwrap.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/wordwrap.c')
-rw-r--r--tools/wordwrap.c635
1 files changed, 635 insertions, 0 deletions
diff --git a/tools/wordwrap.c b/tools/wordwrap.c
new file mode 100644
index 0000000..8089fc9
--- /dev/null
+++ b/tools/wordwrap.c
@@ -0,0 +1,635 @@
+/* This file is part of GDBM, the GNU data base manager.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+ GDBM is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GDBM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "autoconf.h"
+#include "gdbmapp.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <wctype.h>
+#include <wchar.h>
+#include <errno.h>
+#include <limits.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+
+#define UNSET ((unsigned)-1)
+#define ISSET(c) (c != UNSET)
+#define DEFAULT_RIGHT_MARGIN 80
+
+struct wordwrap_file
+{
+ int fd; /* Output file descriptor. */
+ unsigned left_margin; /* Left margin. */
+ unsigned right_margin; /* Right margin. */
+ char *buffer; /* Output buffer. */
+ size_t bufsize; /* Size of buffer in bytes. */
+ unsigned offset; /* Offset of the writing point in the buffer */
+ unsigned column; /* Number of screen column, i.e. the (multibyte)
+ character corresponding to the offset. */
+ unsigned last_ws; /* Offset of the beginning of the last whitespace
+ sequence written to the buffer. */
+ unsigned ws_run; /* Number of characters in the whitespace sequence. */
+ unsigned word_start; /* Start of a sequence that should be treated as a
+ single word. */
+ unsigned next_left_margin; /* Left margin to be set after next flush. */
+
+ int indent; /* If 1, reindent next line. */
+ int unibyte; /* 0: Normal operation.
+ 1: multibyte functions disabled for this line. */
+ int err; /* Last errno value associated with this file. */
+};
+
+/*
+ * Reset the file for the next input line.
+ */
+static void
+wordwrap_line_init (WORDWRAP_FILE wf)
+{
+ wf->offset = wf->column = wf->left_margin;
+ wf->last_ws = UNSET;
+ wf->ws_run = 0;
+ wf->unibyte = 0;
+}
+
+/*
+ * Detect the value of the right margin. Use TIOCGWINSZ ioctl, the COLUMNS
+ * environment variable, or the default value, in that order.
+ */
+static unsigned
+detect_right_margin (WORDWRAP_FILE wf)
+{
+ struct winsize ws;
+ unsigned r = 0;
+
+ ws.ws_col = ws.ws_row = 0;
+ if ((ioctl (wf->fd, TIOCGWINSZ, (char *) &ws) < 0) || ws.ws_col == 0)
+ {
+ char *p = getenv ("COLUMNS");
+ if (p)
+ {
+ unsigned long n;
+ char *ep;
+ errno = 0;
+ n = strtoul (p, &ep, 10);
+ if (!(errno || *ep || n > UINT_MAX))
+ r = n;
+ }
+ else
+ r = DEFAULT_RIGHT_MARGIN;
+ }
+ else
+ r = ws.ws_col;
+ return r;
+}
+
+/*
+ * Create a wordwrap file operating on file descriptor FD.
+ * In the contrast to the libc fdopen, the descriptor is dup'ed.
+ * Left margin is set to 0, right margin is auto detected.
+ */
+WORDWRAP_FILE
+wordwrap_fdopen (int fd)
+{
+ struct wordwrap_file *wf;
+ int ec;
+
+ if ((wf = calloc (1, sizeof (*wf))) == NULL)
+ return NULL;
+ if ((wf->fd = dup (fd)) == -1)
+ {
+ ec = errno;
+ free (wf);
+ errno = ec;
+ return NULL;
+ }
+
+ wf->last_ws = UNSET;
+ wf->word_start = UNSET;
+ wf->next_left_margin = UNSET;
+
+ wordwrap_set_right_margin (wf, 0);
+
+ return wf;
+}
+
+/*
+ * Close the descriptor associated with the wordwrap file, and deallocate
+ * the memory.
+ */
+int
+wordwrap_close (WORDWRAP_FILE wf)
+{
+ int rc;
+
+ rc = wordwrap_flush (wf);
+ close (wf->fd);
+ free (wf->buffer);
+ free (wf);
+
+ return rc;
+}
+
+/*
+ * Return true if wordwrap file is at the beginning of line.
+ */
+int
+wordwrap_at_bol (WORDWRAP_FILE wf)
+{
+ return wf->column == wf->left_margin;
+}
+
+/*
+ * Return true if wordwrap file is at the end of line.
+ */
+int
+wordwrap_at_eol (WORDWRAP_FILE wf)
+{
+ return wf->column == wf->right_margin;
+}
+
+/*
+ * Write SIZE bytes from the buffer to the file.
+ * Return the number of bytes written.
+ * Set the file error indicator on error.
+ */
+static ssize_t
+full_write (WORDWRAP_FILE wf, size_t size)
+{
+ ssize_t total = 0;
+
+ while (total < size)
+ {
+ ssize_t n = write (wf->fd, wf->buffer + total, size - total);
+ if (n == -1)
+ {
+ wf->err = errno;
+ break;
+ }
+ if (n == 0)
+ {
+ wf->err = ENOSPC;
+ break;
+ }
+ total += n;
+ }
+ return total;
+}
+
+/*
+ * A fail-safe version of mbrtowc. If the call to mbrtowc, fails,
+ * switches the stream to the unibyte mode.
+ */
+static inline size_t
+safe_mbrtowc (WORDWRAP_FILE wf, wchar_t *wc, const char *s, mbstate_t *ps)
+{
+ if (!wf->unibyte)
+ {
+ size_t n = mbrtowc (wc, s, MB_CUR_MAX, ps);
+ if (n == (size_t) -1 || n == (size_t) -2)
+ wf->unibyte = 1;
+ else
+ return n;
+ }
+ *wc = *(unsigned char *)s;
+ return 1;
+}
+
+/*
+ * Return length of the whitespace prefix in STR.
+ */
+static size_t
+wsprefix (WORDWRAP_FILE wf, char const *str, size_t size)
+{
+ size_t i;
+ mbstate_t mbs;
+ wchar_t wc;
+
+ memset (&mbs, 0, sizeof (mbs));
+ for (i = 0; i < size; )
+ {
+ size_t n = safe_mbrtowc (wf, &wc, &str[i], &mbs);
+
+ if (!iswblank (wc))
+ break;
+
+ i += n;
+ }
+
+ return i;
+}
+
+/*
+ * Rescan N bytes from the current buffer from the current offset.
+ * Update offset, column, and whitespace segment counters.
+ */
+static void
+wordwrap_rescan (WORDWRAP_FILE wf, size_t n)
+{
+ mbstate_t mbs;
+ wchar_t wc;
+
+ wordwrap_line_init (wf);
+
+ memset (&mbs, 0, sizeof (mbs));
+ while (wf->offset < n)
+ {
+ size_t n = safe_mbrtowc (wf, &wc, &wf->buffer[wf->offset], &mbs);
+
+ if (iswblank (wc))
+ {
+ if (ISSET (wf->last_ws) && wf->last_ws + wf->ws_run == wf->offset)
+ wf->ws_run++;
+ else
+ {
+ wf->last_ws = wf->offset;
+ wf->ws_run = 1;
+ }
+ }
+
+ wf->offset += n;
+ wf->column++;
+ }
+}
+
+/*
+ * Flush SIZE bytes from the current buffer to the FD.
+ * Reinitialize WF for the next line.
+ */
+static int
+flush_line (WORDWRAP_FILE wf, size_t size)
+{
+ ssize_t n;
+ size_t len;
+ char c;
+
+ if (ISSET (wf->last_ws) && size == wf->last_ws + wf->ws_run)
+ len = wf->last_ws;
+ else
+ len = size;
+
+ if (len >= wf->left_margin && wf->offset > wf->left_margin)
+ {
+ n = full_write (wf, len);
+ if (n == -1)
+ return -1;
+
+ if (n < len)
+ {
+ //FIXME: this breaks column and ws tracking
+ abort ();
+ }
+ }
+
+ c = '\n';
+ write (wf->fd, &c, 1);
+
+ if (ISSET (wf->next_left_margin))
+ {
+ wf->left_margin = wf->next_left_margin;
+ wf->next_left_margin = UNSET;
+ }
+
+ n = wf->offset - size;
+ if (n > 0)
+ {
+ size_t wsn;
+
+ wsn = wsprefix (wf, wf->buffer + size, n);
+
+ size += wsn;
+ n -= wsn;
+
+ if (n)
+ memmove (wf->buffer + wf->left_margin, wf->buffer + size, n);
+ }
+
+ if (wf->indent)
+ {
+ memset (wf->buffer, ' ', wf->left_margin);
+ wf->indent = 0;
+ }
+ wordwrap_rescan (wf, wf->left_margin + n);
+
+ return 0;
+}
+
+/*
+ * Flush the wordwrap file buffer.
+ */
+int
+wordwrap_flush (WORDWRAP_FILE wf)
+{
+ if (wf->offset > wf->left_margin)
+ return flush_line (wf, wf->offset);
+ return 0;
+}
+
+/*
+ * Return error indicator (last errno value).
+ */
+int
+wordwrap_error (WORDWRAP_FILE wf)
+{
+ return wf->err;
+}
+
+/*
+ * Set left margin value.
+ */
+int
+wordwrap_set_left_margin (WORDWRAP_FILE wf, unsigned left)
+{
+ int bol;
+
+ if (left == wf->left_margin)
+ return 0;
+ else if (left >= wf->right_margin)
+ {
+ wf->err = errno = EINVAL;
+ return -1;
+ }
+
+ bol = wordwrap_at_bol (wf);
+ wf->left_margin = left;
+ wf->indent = 1;
+ if (left < wf->offset)
+ {
+ if (!bol)
+ flush_line (wf, wf->offset);//FIXME: remove trailing ws
+ }
+ else
+ {
+ if (left > wf->offset)
+ memset (wf->buffer + wf->offset, ' ', wf->left_margin - wf->offset);
+ }
+ wordwrap_line_init (wf);
+
+ return 0;
+}
+
+/*
+ * Set delayed left margin value. The new value will take effect after the
+ * current line is flushed.
+ */
+int
+wordwrap_next_left_margin (WORDWRAP_FILE wf, unsigned left)
+{
+ if (left == wf->left_margin)
+ return 0;
+ else if (left >= wf->right_margin)
+ {
+ wf->err = errno = EINVAL;
+ return -1;
+ }
+ wf->next_left_margin = left;
+ wf->indent = 1;
+ return 0;
+}
+
+/*
+ * Set right margin for the file.
+ */
+int
+wordwrap_set_right_margin (WORDWRAP_FILE wf, unsigned right)
+{
+ if (right == 0)
+ right = detect_right_margin (wf);
+
+ if (right == wf->right_margin)
+ return 0;
+ else if (right <= wf->left_margin)
+ {
+ wf->err = errno = EINVAL;
+ return -1;
+ }
+ else
+ {
+ char *p;
+ size_t size;
+
+ if (right < wf->offset)
+ {
+ if (wordwrap_flush (wf))
+ return -1;
+ }
+
+ size = MB_CUR_MAX * (right + 1);
+ p = realloc (wf->buffer, size);
+ if (!p)
+ {
+ wf->err = errno;
+ return -1;
+ }
+
+ wf->buffer = p;
+ wf->bufsize = size;
+ wf->right_margin = right;
+ }
+
+ return 0;
+}
+
+/*
+ * Mark current output position as the word start. The normal whitespace
+ * splitting is disabled, until wordwrap_word_end is called or the current
+ * buffer is flushed, whichever happens first.
+ * The functions wordwrap_word_start () / wordwrap_word_end () mark the
+ * sequence of characters that should not be split on whitespace, such as,
+ * e.g. option name with argument in help output ("-f FILE").
+ */
+void
+wordwrap_word_start (WORDWRAP_FILE wf)
+{
+ wf->word_start = wf->offset;
+}
+
+/*
+ * Disable word marker.
+ */
+void
+wordwrap_word_end (WORDWRAP_FILE wf)
+{
+ wf->word_start = UNSET;
+}
+
+/*
+ * Write LEN bytes from the string STR to the wordwrap file.
+ */
+int
+wordwrap_write (WORDWRAP_FILE wf, char const *str, size_t len)
+{
+ size_t i;
+ wchar_t wc;
+ mbstate_t mbs;
+
+ memset (&mbs, 0, sizeof (mbs));
+ for (i = 0; i < len; )
+ {
+ size_t n = safe_mbrtowc (wf, &wc, &str[i], &mbs);
+
+ if (wf->column + 1 == wf->right_margin || wc == '\n')
+ {
+ size_t len;
+
+ if (ISSET (wf->word_start))
+ {
+ len = wf->word_start;
+ wf->word_start = UNSET;
+ }
+ else if (!iswspace (wc) && ISSET (wf->last_ws))
+ len = wf->last_ws;
+ else
+ len = wf->offset;
+
+ flush_line (wf, len);
+ if (wc == '\n')
+ {
+ i += n;
+ continue;
+ }
+ }
+
+ if (iswblank (wc))
+ {
+ if (wf->offset == wf->left_margin)
+ {
+ /* Skip leading whitespace */
+ i += n;
+ continue;
+ }
+ else if (ISSET (wf->last_ws) && wf->last_ws + wf->ws_run == wf->offset)
+ wf->ws_run++;
+ else
+ {
+ wf->last_ws = wf->offset;
+ wf->ws_run = 1;
+ }
+ }
+
+ memcpy (wf->buffer + wf->offset, str + i, n);
+
+ wf->offset += n;
+ wf->column++;
+
+ i += n;
+ }
+ return 0;
+}
+
+/*
+ * Write a nul-terminated string STR to the file (terminating \0 not
+ * included).
+ */
+int
+wordwrap_puts (WORDWRAP_FILE wf, char const *str)
+{
+ return wordwrap_write (wf, str, strlen (str));
+}
+
+/*
+ * Write a single character to the file.
+ */
+int
+wordwrap_putc (WORDWRAP_FILE wf, int c)
+{
+ char ch = c;
+ return wordwrap_write (wf, &ch, 1);
+}
+
+/*
+ * Insert a paragraph (empty line).
+ */
+int
+wordwrap_para (WORDWRAP_FILE wf)
+{
+ return wordwrap_write (wf, "\n\n", 2);
+}
+
+/*
+ * Format AP according to FMT and write the formatted output to file.
+ */
+int
+wordwrap_vprintf (WORDWRAP_FILE wf, char const *fmt, va_list ap)
+{
+ size_t buflen = 64;
+ char *buf;
+ ssize_t n;
+ int rc;
+
+ buf = malloc (buflen);
+ if (!buf)
+ {
+ wf->err = errno;
+ return -1;
+ }
+
+ for (;;)
+ {
+ va_list aq;
+
+ va_copy (aq, ap);
+ n = vsnprintf (buf, buflen, fmt, aq);
+ va_end (aq);
+
+ if (n < 0 || n >= buflen || !memchr(buf, '\0', n + 1))
+ {
+ char *p;
+
+ if ((size_t) -1 / 3 * 2 <= buflen)
+ {
+ wf->err = ENOMEM;
+ free (buf);
+ return -1;
+ }
+
+ buflen += (buflen + 1) / 2;
+ p = realloc (buf, buflen);
+ if (!p)
+ {
+ wf->err = errno;
+ free (buf);
+ return -1;
+ }
+ buf = p;
+ }
+ else
+ break;
+ }
+
+ rc = wordwrap_write (wf, buf, n);
+ free (buf);
+ return rc;
+}
+
+/*
+ * Format argument list according to FMT and write the formatted output
+ * to file.
+ */
+int
+wordwrap_printf (WORDWRAP_FILE wf, char const *fmt, ...)
+{
+ va_list ap;
+ int rc;
+
+ va_start (ap, fmt);
+ rc = wordwrap_vprintf (wf, fmt, ap);
+ va_end (ap);
+ return rc;
+}
+
+
+

Return to:

Send suggestions and report system problems to the System administrator.