diff options
Diffstat (limited to 'wordsplit.c')
-rw-r--r-- | wordsplit.c | 85 |
1 files changed, 45 insertions, 40 deletions
diff --git a/wordsplit.c b/wordsplit.c index aca63df..9139e85 100644 --- a/wordsplit.c +++ b/wordsplit.c @@ -1,5 +1,5 @@ /* wordsplit - a word splitter - Copyright (C) 2009-2021 Sergey Poznyakoff + Copyright (C) 2009-2023 Sergey Poznyakoff This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -67,6 +67,8 @@ is_name_char (struct wordsplit *wsp, int c) #define to_num(c) \ (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) +static int wsplt_unquote_char (const char *transtab, int c); + #define ALLOC_INIT 128 #define ALLOC_INCR 128 @@ -247,7 +249,16 @@ wordsplit_init0 (struct wordsplit *wsp) wsp->ws_errno = 0; } -char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; +char const *wordsplit_escape[] = { + /* C-style escapes, for quoted strings */ + [WS_ESC_C] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v", + /* C-style escapes, outsize of quoted strings */ + [WS_ESC_C_WS] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v \t\t", + /* Escape double-quote and backslash. */ + [WS_ESC_DQ] = "\\\\\"\"", + /* Escape double-quote, backslash, and whitespace. */ + [WS_ESC_DQ_WS] = "\\\\\"\" \t\t" +}; static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, @@ -314,21 +325,17 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, if (!wsp->ws_escape[WRDSX_QUOTE]) wsp->ws_escape[WRDSX_QUOTE] = ""; } + else if (wsp->ws_flags & WRDSF_CESCAPES) + { + wsp->ws_escape[WRDSX_WORD] = wordsplit_escape[WS_ESC_C_WS]; + wsp->ws_escape[WRDSX_QUOTE] = wordsplit_escape[WS_ESC_C]; + wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD + | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; + } else { - if (wsp->ws_flags & WRDSF_CESCAPES) - { - wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab; - wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab; - wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD - | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD; - } - else - { - wsp->ws_escape[WRDSX_WORD] = ""; - wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\""; - wsp->ws_options |= WRDSO_BSKEEP_QUOTE; - } + wsp->ws_escape[WRDSX_WORD] = ""; + wsp->ws_escape[WRDSX_QUOTE] = ""; } if (!(wsp->ws_options & WRDSO_PARAMV)) @@ -700,14 +707,8 @@ wsnode_quoteremoval (struct wordsplit *wsp) { const char *str = wsnode_ptr (wsp, p); size_t slen = wsnode_len (p); - int unquote; - - if (wsp->ws_flags & WRDSF_QUOTE) - unquote = !(p->flags & _WSNF_NOEXPAND); - else - unquote = 0; - if (unquote) + if (!(p->flags & _WSNF_NOEXPAND)) { if (!(p->flags & _WSNF_WORD)) { @@ -2303,30 +2304,34 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all) return _WRDS_OK; } - if (wsp->ws_flags & WRDSF_QUOTE) + if (command[i] == '\\') { - if (command[i] == '\\') + if (i + 1 == len) { - if (++i == len) - break; i++; - continue; + break; } - - if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') || - ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"')) + if (wsplt_unquote_char (wsp->ws_escape[WRDSX_WORD], command[i+1])) { - if (join && wsp->ws_tail) - wsp->ws_tail->flags |= _WSNF_JOIN; - if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN)) - return _WRDS_ERR; - if (scan_qstring (wsp, i, &i)) - return _WRDS_ERR; - start = i + 1; - join = 1; + i += 2; + continue; } } + if ((wsp->ws_flags & WRDSF_QUOTE) && + (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') || + ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))) + { + if (join && wsp->ws_tail) + wsp->ws_tail->flags |= _WSNF_JOIN; + if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN)) + return _WRDS_ERR; + if (scan_qstring (wsp, i, &i)) + return _WRDS_ERR; + start = i + 1; + join = 1; + } + if (command[i] == '$') { if ((!(wsp->ws_flags & WRDSF_NOVAR) @@ -2449,13 +2454,13 @@ wsplt_quote_char (const char *transtab, int c) int wordsplit_c_unquote_char (int c) { - return wsplt_unquote_char (wordsplit_c_escape_tab, c); + return wsplt_unquote_char (wordsplit_escape[WS_ESC_C], c); } int wordsplit_c_quote_char (int c) { - return wsplt_quote_char (wordsplit_c_escape_tab, c); + return wsplt_quote_char (wordsplit_escape[WS_ESC_C], c); } void |