aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2007-10-04 07:12:57 +0000
committerSergey Poznyakoff <gray@gnu.org.ua>2007-10-04 07:12:57 +0000
commitb9623752e7393971d4e3bacd933aac4cf6b5b17c (patch)
tree307e618e75f4c8f1cb0ce41a14841f6de561961f
parent68064efcb3748bcf9f6261f601b5c535674936ea (diff)
downloadswis-b9623752e7393971d4e3bacd933aac4cf6b5b17c.tar.gz
swis-b9623752e7393971d4e3bacd933aac4cf6b5b17c.tar.bz2
New option --tag; Some minor fixes.
git-svn-id: file:///svnroot/swis/trunk@11 05ba3e8d-823b-0410-8fb2-de0ee4edb5ba
-rw-r--r--src/html-strip.l23
-rw-r--r--src/word-split.c25
2 files changed, 41 insertions, 7 deletions
diff --git a/src/html-strip.l b/src/html-strip.l
index b42f618..f337c5d 100644
--- a/src/html-strip.l
+++ b/src/html-strip.l
@@ -25,7 +25,7 @@ size_t idx;
iconv_t cd = INVALID_ICONV_CD;
static void
-parse_content_type()
+parse_content_type ()
{
char *start, *p;
size_t len;
@@ -53,7 +53,7 @@ parse_content_type()
if (*p == ';' || isspace (*p))
{
*p = 0;
- break;
+ break;
}
p++;
}
@@ -166,7 +166,7 @@ WS [ \t]+
"&szlig;" output ("ß");
"&Eth;" output (yytext[1] == 'E' ? "Ð" : "ð");
"&yuml;" output ("ÿ");
- "&#"[0-9]{1,3}";" { /* FIXME */; }
+ "&#"[0-9]{1,5}";" { /* FIXME */; }
"&nbsp;" output (" ");
"&"[^;]*";" output (" ");
. {
@@ -185,12 +185,13 @@ WS [ \t]+
<ELEMENT>{
\"[^\"]*\" ;
'[^']*' ;
- ">" { BEGIN (INITIAL); }
+ ">" { BEGIN (INITIAL); output (" "); }
. ;
}
%%
char **input_file;
+int tag_option;
int
open_input ()
@@ -206,7 +207,11 @@ open_input ()
if (!yyin)
error (1, errno, "cannot open input file %s", name);
}
- return 0;
+ if (tag_option)
+ {
+ fprintf (yyout ? yyout : stdout, "\n> %s\n", name);
+ }
+ return 0;
}
return 1;
}
@@ -235,6 +240,7 @@ struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'v' },
{ "output", required_argument, NULL, 'o' },
+ { "tag", no_argument, NULL, 't' },
{ NULL }
};
@@ -248,6 +254,7 @@ usage ()
printf ("\nOptions are:\n");
printf (" -d, --debug output debugging info\n");
printf (" -o, --output=FILE direct output to FILE instead of stdout\n");
+ printf (" -t, --tag tag each output block with the source file name");
printf ("\n");
printf (" -h, --help print this help list\n");
printf (" -v, --version print program version and exit\n");
@@ -263,7 +270,7 @@ main (int argc, char **argv)
program_name = argv[0];
yy_flex_debug = 0;
- while ((c = getopt_long (argc, argv, "dho:v", options, NULL)) != EOF)
+ while ((c = getopt_long (argc, argv, "dho:tv", options, NULL)) != EOF)
{
switch (c)
{
@@ -284,6 +291,10 @@ main (int argc, char **argv)
if (!yyout)
error (1, errno, "cannot open output file %s", optarg);
break;
+
+ case 't':
+ tag_option = 1;
+ break;
case 'v':
version_etc (stdout, "html-strip", PACKAGE_NAME, VERSION,
diff --git a/src/word-split.c b/src/word-split.c
index 59a05b7..59b11d6 100644
--- a/src/word-split.c
+++ b/src/word-split.c
@@ -39,6 +39,7 @@ usage ()
printf ("\nOptions are:\n");
/* printf (" -d, --debug output debugging info\n"); */
printf (" -o, --output=FILE direct output to FILE instead of stdout\n");
+ printf (" -t, --tag preserve file name tags\n");
printf ("\n");
printf (" -h, --help print this help list\n");
printf (" -v, --version print program version and exit\n");
@@ -49,6 +50,7 @@ usage ()
char **input_file;
FILE *input;
FILE *output;
+int tag_option;
int
open_input ()
@@ -75,10 +77,27 @@ int
word_split ()
{
unsigned wc;
+ int after_newline;
+
while ((wc = fgetc (input)) != EOF)
{
if (wc < 0x80)
{
+ if (tag_option)
+ {
+ if (after_newline && wc == '>')
+ {
+ do
+ fputc (wc, output);
+ while ((wc = fgetc (input)) != EOF && wc != '\n');
+ fputc ('\n', output);
+ after_delim_output = 1;
+ after_newline = 0;
+ continue;
+ }
+ after_newline = wc == '\n';
+ }
+
if (isalnum (wc))
{
fputc (wc, output);
@@ -134,7 +153,7 @@ main (int argc, char **argv)
program_name = argv[0];
- while ((c = getopt_long (argc, argv, "dho:v", options, NULL)) != EOF)
+ while ((c = getopt_long (argc, argv, "dho:tv", options, NULL)) != EOF)
{
switch (c)
{
@@ -156,6 +175,10 @@ main (int argc, char **argv)
if (!output)
error (1, errno, "cannot open output file %s", optarg);
break;
+
+ case 't':
+ tag_option = 1;
+ break;
case 'v':
version_etc (stdout, "word-split", PACKAGE_NAME, VERSION,

Return to:

Send suggestions and report system problems to the System administrator.