diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2007-10-04 07:12:57 +0000 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2007-10-04 07:12:57 +0000 |
commit | b9623752e7393971d4e3bacd933aac4cf6b5b17c (patch) | |
tree | 307e618e75f4c8f1cb0ce41a14841f6de561961f | |
parent | 68064efcb3748bcf9f6261f601b5c535674936ea (diff) | |
download | swis-b9623752e7393971d4e3bacd933aac4cf6b5b17c.tar.gz swis-b9623752e7393971d4e3bacd933aac4cf6b5b17c.tar.bz2 |
New option --tag; Some minor fixes.
git-svn-id: file:///svnroot/swis/trunk@11 05ba3e8d-823b-0410-8fb2-de0ee4edb5ba
-rw-r--r-- | src/html-strip.l | 23 | ||||
-rw-r--r-- | src/word-split.c | 25 |
2 files changed, 41 insertions, 7 deletions
diff --git a/src/html-strip.l b/src/html-strip.l index b42f618..f337c5d 100644 --- a/src/html-strip.l +++ b/src/html-strip.l @@ -25,7 +25,7 @@ size_t idx; iconv_t cd = INVALID_ICONV_CD; static void -parse_content_type() +parse_content_type () { char *start, *p; size_t len; @@ -53,7 +53,7 @@ parse_content_type() if (*p == ';' || isspace (*p)) { *p = 0; - break; + break; } p++; } @@ -166,7 +166,7 @@ WS [ \t]+ "ß" output ("ß"); "&Eth;" output (yytext[1] == 'E' ? "Ð" : "ð"); "ÿ" output ("ÿ"); - "&#"[0-9]{1,3}";" { /* FIXME */; } + "&#"[0-9]{1,5}";" { /* FIXME */; } " " output (" "); "&"[^;]*";" output (" "); . { @@ -185,12 +185,13 @@ WS [ \t]+ <ELEMENT>{ \"[^\"]*\" ; '[^']*' ; - ">" { BEGIN (INITIAL); } + ">" { BEGIN (INITIAL); output (" "); } . ; } %% char **input_file; +int tag_option; int open_input () @@ -206,7 +207,11 @@ open_input () if (!yyin) error (1, errno, "cannot open input file %s", name); } - return 0; + if (tag_option) + { + fprintf (yyout ? yyout : stdout, "\n> %s\n", name); + } + return 0; } return 1; } @@ -235,6 +240,7 @@ struct option options[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'v' }, { "output", required_argument, NULL, 'o' }, + { "tag", no_argument, NULL, 't' }, { NULL } }; @@ -248,6 +254,7 @@ usage () printf ("\nOptions are:\n"); printf (" -d, --debug output debugging info\n"); printf (" -o, --output=FILE direct output to FILE instead of stdout\n"); + printf (" -t, --tag tag each output block with the source file name"); printf ("\n"); printf (" -h, --help print this help list\n"); printf (" -v, --version print program version and exit\n"); @@ -263,7 +270,7 @@ main (int argc, char **argv) program_name = argv[0]; yy_flex_debug = 0; - while ((c = getopt_long (argc, argv, "dho:v", options, NULL)) != EOF) + while ((c = getopt_long (argc, argv, "dho:tv", options, NULL)) != EOF) { switch (c) { @@ -284,6 +291,10 @@ main (int argc, char **argv) if (!yyout) error (1, errno, "cannot open output file %s", optarg); break; + + case 't': + tag_option = 1; + break; case 'v': version_etc (stdout, "html-strip", PACKAGE_NAME, VERSION, diff --git a/src/word-split.c b/src/word-split.c index 59a05b7..59b11d6 100644 --- a/src/word-split.c +++ b/src/word-split.c @@ -39,6 +39,7 @@ usage () printf ("\nOptions are:\n"); /* printf (" -d, --debug output debugging info\n"); */ printf (" -o, --output=FILE direct output to FILE instead of stdout\n"); + printf (" -t, --tag preserve file name tags\n"); printf ("\n"); printf (" -h, --help print this help list\n"); printf (" -v, --version print program version and exit\n"); @@ -49,6 +50,7 @@ usage () char **input_file; FILE *input; FILE *output; +int tag_option; int open_input () @@ -75,10 +77,27 @@ int word_split () { unsigned wc; + int after_newline; + while ((wc = fgetc (input)) != EOF) { if (wc < 0x80) { + if (tag_option) + { + if (after_newline && wc == '>') + { + do + fputc (wc, output); + while ((wc = fgetc (input)) != EOF && wc != '\n'); + fputc ('\n', output); + after_delim_output = 1; + after_newline = 0; + continue; + } + after_newline = wc == '\n'; + } + if (isalnum (wc)) { fputc (wc, output); @@ -134,7 +153,7 @@ main (int argc, char **argv) program_name = argv[0]; - while ((c = getopt_long (argc, argv, "dho:v", options, NULL)) != EOF) + while ((c = getopt_long (argc, argv, "dho:tv", options, NULL)) != EOF) { switch (c) { @@ -156,6 +175,10 @@ main (int argc, char **argv) if (!output) error (1, errno, "cannot open output file %s", optarg); break; + + case 't': + tag_option = 1; + break; case 'v': version_etc (stdout, "word-split", PACKAGE_NAME, VERSION, |