diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-04-19 15:54:51 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-04-19 15:54:51 +0300 |
commit | 94d116ca5e9735ba47a755e7cbfb2b429ec5e88e (patch) | |
tree | 74b688c7b1c042d7747c0320e88cdb64e9bdd8e0 | |
parent | 83d23534b9fedef803af8e06aa9fc19d504d65f3 (diff) | |
download | mailutils-94d116ca5e9735ba47a755e7cbfb2b429ec5e88e.tar.gz mailutils-94d116ca5e9735ba47a755e7cbfb2b429ec5e88e.tar.bz2 |
Automatically handle native character sets on input to the mail utility.
If the mime header is set, then mail will provide the missing 'charset'
parameter for each Content-Type header that begins with 'text/'.
Its value will be determined by examining the 'charset' mail variable.
If it is set to 'auto' (the default), the character set will be extracted
from the value of the LC_ALL environment variable. If it is unset, it
will be deduced from the LANG environment variable.
Thus, provided that LC_ALL is set correctly, the following setting in
.mailrc is recommended to ensure that mails in native character
sets will be processed correctly:
set charset=auto mime
In most cases, it can be simplified to just 'set mime'.
* NEWS: Update.
* doc/texinfo/programs.texi: Update the description of the charset
variable.
* mail/mail.h (util_get_charset): New proto.
* mail/send.c (attach_set_content_type): New function.
(attlist_add, add_body): Use attach_set_content_type to
set the content_type field.
* mail/util.c (util_get_charset): New function.
(util_rfc2047_decode): Use util_get_charset.
-rw-r--r-- | NEWS | 10 | ||||
-rw-r--r-- | doc/texinfo/programs.texi | 25 | ||||
-rw-r--r-- | mail/mail.h | 1 | ||||
-rw-r--r-- | mail/send.c | 33 | ||||
-rw-r--r-- | mail/util.c | 45 |
5 files changed, 87 insertions, 27 deletions
@@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to '-E set mime', except that it takes effect after all options are processed. +** Character sets + +The 'charset' variable controls both input and output operations. On +input it is used to set the value of the missing 'charset' parameter +in the 'Content-Type' MIME header, if its value begins with 'text/'. +This means, in particular, that if this variable is set to its default +value (charset=auto), the LC_ALL environment variable is correctly +set, and the 'mime' variable is set, then mail can safely be used to +send messages in native character sets. + ** New option --alternative When used with --attach or --attach-fd options, this option sets the diff --git a/doc/texinfo/programs.texi b/doc/texinfo/programs.texi index 52d11d4c9..542945a92 100644 --- a/doc/texinfo/programs.texi +++ b/doc/texinfo/programs.texi @@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope. @*Default: @samp{auto} @vrindex charset, mail variable -The value of this variable controls the output character set for the -header fields encoding using RFC 2047. If the variable is unset, no -decoding is performed and the fields are printed as they are. If the -variable is set to @samp{auto}, @command{mail} tries to deduce the -name of the character set from the value of @code{LC_ALL} environment -variable. Otherwise, its value is taken as the name of the charset. +The value of this variable is the character set used for input and +output operations. If the value is @samp{auto}, @command{mail} will +try to deduce the name of the character set from the value of +@samp{LC_ALL} environment variable. If the variable contains the +character set part (e.g. @samp{nb_NO.utf-8}), it will be used. +Otherwise, @command{mail} will look up in its built-in database the +value of the character for this language/territory combination. If +@samp{LC_ALL} is not set, the @samp{LANG} environment variable is +inspected. + +The value of @samp{charset} controls both input and output +operations. On input, it is used to set the value of the +@samp{charset} parameter in the @samp{Content-Type} MIME header, if +its value begins with @samp{text/} and @samp{charset} is not present. + +On output, it is used to display values of the header fields encodied +using RFC 2047. If the variable is unset, no decoding is performed +and the fields are printed as they are. Otherwise, they are recoded +to that character set. @kwindex cmd @item cmd diff --git a/mail/mail.h b/mail/mail.h index e2a4dd7db..38f70177e 100644 --- a/mail/mail.h +++ b/mail/mail.h @@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2, void util_run_cached_commands (mu_list_t *list); const char *util_reply_prefix (void); void util_rfc2047_decode (char **value); +char *util_get_charset (void); void util_mark_read (mu_message_t msg); diff --git a/mail/send.c b/mail/send.c index 63d9d3026..7075794da 100644 --- a/mail/send.c +++ b/mail/send.c @@ -173,6 +173,25 @@ attlist_new (void) } static void +attach_set_content_type (struct atchinfo *aptr, char const *content_type) +{ + char *charset; + + if (!content_type) + content_type = "text/plain"; + if (strncmp (content_type, "text/", 5) == 0 + && !strstr (content_type, "charset=") + && (charset = util_get_charset ())) + { + mu_asprintf (&aptr->content_type, "%s; charset=%s", + content_type, charset); + free (charset); + } + else + aptr->content_type = mu_strdup (content_type); +} + +static void attlist_add (mu_list_t attlist, char *id, char const *encoding, char const *content_type, char const *content_name, char const *content_filename, @@ -184,9 +203,10 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding, aptr = mu_alloc (sizeof (*aptr)); aptr->id = id ? mu_strdup (id) : id; - aptr->encoding = mu_strdup (encoding); - aptr->content_type = mu_strdup (content_type ? - content_type : "application/octet-stream"); + aptr->encoding = mu_strdup (encoding); + attach_set_content_type (aptr, + content_type + ? content_type : "application/octet-stream"); aptr->name = content_name ? mu_strdup (content_name) : NULL; aptr->filename = content_filename ? mu_strdup (content_filename) : NULL; aptr->source = stream; @@ -505,15 +525,14 @@ add_body (mu_message_t inmsg, compose_env_t *env) mu_body_t body; mu_stream_t str; struct atchinfo *aptr; - + mu_message_get_body (inmsg, &body); mu_body_get_streamref (body, &str); aptr = mu_alloc (sizeof (*aptr)); aptr->id = NULL; - aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL; - aptr->content_type = mu_strdup (default_content_type ? - default_content_type : "text/plain"); + aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL; + attach_set_content_type (aptr, default_content_type); aptr->name = NULL; aptr->filename = NULL; aptr->source = str; diff --git a/mail/util.c b/mail/util.c index f5db6b1b7..652f283c1 100644 --- a/mail/util.c +++ b/mail/util.c @@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list) mu_list_destroy (list); } -void -util_rfc2047_decode (char **value) +char * +util_get_charset (void) { - char *charset = NULL; - char *tmp; - int rc; - struct mu_lc_all lc_all = { .flags = 0 }; + char *charset; - if (!*value || mailvar_get (&charset, "charset", mailvar_type_string, 0)) - return; + if (mailvar_get (&charset, "charset", mailvar_type_string, 0)) + return NULL; if (mu_c_strcasecmp (charset, "auto") == 0) { - tmp = getenv ("LC_ALL"); + struct mu_lc_all lc_all = { .flags = 0 }; + char *tmp = getenv ("LC_ALL"); if (!tmp) tmp = getenv ("LANG"); - + if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0) - charset = lc_all.charset; + { + charset = mu_strdup (lc_all.charset); + mu_lc_all_free (&lc_all); + } + else + charset = NULL; } + else + charset = mu_strdup (charset); + + return charset; +} + +void +util_rfc2047_decode (char **value) +{ + char *charset, *tmp; + int rc; + if (!*value) + return; + charset = util_get_charset (); if (!charset) return; - + rc = mu_rfc2047_decode (charset, *value, &tmp); + free (charset); + if (rc) { if (mailvar_is_true ("verbose")) @@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value) free (*value); *value = tmp; } - if (lc_all.flags) - mu_lc_all_free (&lc_all); } const char * |