summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2017-04-19 15:54:51 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2017-04-19 15:54:51 +0300
commit94d116ca5e9735ba47a755e7cbfb2b429ec5e88e (patch)
tree74b688c7b1c042d7747c0320e88cdb64e9bdd8e0
parent83d23534b9fedef803af8e06aa9fc19d504d65f3 (diff)
downloadmailutils-94d116ca5e9735ba47a755e7cbfb2b429ec5e88e.tar.gz
mailutils-94d116ca5e9735ba47a755e7cbfb2b429ec5e88e.tar.bz2
Automatically handle native character sets on input to the mail utility.
If the mime header is set, then mail will provide the missing 'charset' parameter for each Content-Type header that begins with 'text/'. Its value will be determined by examining the 'charset' mail variable. If it is set to 'auto' (the default), the character set will be extracted from the value of the LC_ALL environment variable. If it is unset, it will be deduced from the LANG environment variable. Thus, provided that LC_ALL is set correctly, the following setting in .mailrc is recommended to ensure that mails in native character sets will be processed correctly: set charset=auto mime In most cases, it can be simplified to just 'set mime'. * NEWS: Update. * doc/texinfo/programs.texi: Update the description of the charset variable. * mail/mail.h (util_get_charset): New proto. * mail/send.c (attach_set_content_type): New function. (attlist_add, add_body): Use attach_set_content_type to set the content_type field. * mail/util.c (util_get_charset): New function. (util_rfc2047_decode): Use util_get_charset.
-rw-r--r--NEWS10
-rw-r--r--doc/texinfo/programs.texi25
-rw-r--r--mail/mail.h1
-rw-r--r--mail/send.c33
-rw-r--r--mail/util.c45
5 files changed, 87 insertions, 27 deletions
diff --git a/NEWS b/NEWS
index 2c738989c..ca1f74315 100644
--- a/NEWS
+++ b/NEWS
@@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to
'-E set mime', except that it takes effect after all options are
processed.
+** Character sets
+
+The 'charset' variable controls both input and output operations. On
+input it is used to set the value of the missing 'charset' parameter
+in the 'Content-Type' MIME header, if its value begins with 'text/'.
+This means, in particular, that if this variable is set to its default
+value (charset=auto), the LC_ALL environment variable is correctly
+set, and the 'mime' variable is set, then mail can safely be used to
+send messages in native character sets.
+
** New option --alternative
When used with --attach or --attach-fd options, this option sets the
diff --git a/doc/texinfo/programs.texi b/doc/texinfo/programs.texi
index 52d11d4c9..542945a92 100644
--- a/doc/texinfo/programs.texi
+++ b/doc/texinfo/programs.texi
@@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope.
@*Default: @samp{auto}
@vrindex charset, mail variable
-The value of this variable controls the output character set for the
-header fields encoding using RFC 2047. If the variable is unset, no
-decoding is performed and the fields are printed as they are. If the
-variable is set to @samp{auto}, @command{mail} tries to deduce the
-name of the character set from the value of @code{LC_ALL} environment
-variable. Otherwise, its value is taken as the name of the charset.
+The value of this variable is the character set used for input and
+output operations. If the value is @samp{auto}, @command{mail} will
+try to deduce the name of the character set from the value of
+@samp{LC_ALL} environment variable. If the variable contains the
+character set part (e.g. @samp{nb_NO.utf-8}), it will be used.
+Otherwise, @command{mail} will look up in its built-in database the
+value of the character for this language/territory combination. If
+@samp{LC_ALL} is not set, the @samp{LANG} environment variable is
+inspected.
+
+The value of @samp{charset} controls both input and output
+operations. On input, it is used to set the value of the
+@samp{charset} parameter in the @samp{Content-Type} MIME header, if
+its value begins with @samp{text/} and @samp{charset} is not present.
+
+On output, it is used to display values of the header fields encodied
+using RFC 2047. If the variable is unset, no decoding is performed
+and the fields are printed as they are. Otherwise, they are recoded
+to that character set.
@kwindex cmd
@item cmd
diff --git a/mail/mail.h b/mail/mail.h
index e2a4dd7db..38f70177e 100644
--- a/mail/mail.h
+++ b/mail/mail.h
@@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2,
void util_run_cached_commands (mu_list_t *list);
const char *util_reply_prefix (void);
void util_rfc2047_decode (char **value);
+char *util_get_charset (void);
void util_mark_read (mu_message_t msg);
diff --git a/mail/send.c b/mail/send.c
index 63d9d3026..7075794da 100644
--- a/mail/send.c
+++ b/mail/send.c
@@ -173,6 +173,25 @@ attlist_new (void)
}
static void
+attach_set_content_type (struct atchinfo *aptr, char const *content_type)
+{
+ char *charset;
+
+ if (!content_type)
+ content_type = "text/plain";
+ if (strncmp (content_type, "text/", 5) == 0
+ && !strstr (content_type, "charset=")
+ && (charset = util_get_charset ()))
+ {
+ mu_asprintf (&aptr->content_type, "%s; charset=%s",
+ content_type, charset);
+ free (charset);
+ }
+ else
+ aptr->content_type = mu_strdup (content_type);
+}
+
+static void
attlist_add (mu_list_t attlist, char *id, char const *encoding,
char const *content_type, char const *content_name,
char const *content_filename,
@@ -184,9 +203,10 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding,
aptr = mu_alloc (sizeof (*aptr));
aptr->id = id ? mu_strdup (id) : id;
- aptr->encoding = mu_strdup (encoding);
- aptr->content_type = mu_strdup (content_type ?
- content_type : "application/octet-stream");
+ aptr->encoding = mu_strdup (encoding);
+ attach_set_content_type (aptr,
+ content_type
+ ? content_type : "application/octet-stream");
aptr->name = content_name ? mu_strdup (content_name) : NULL;
aptr->filename = content_filename ? mu_strdup (content_filename) : NULL;
aptr->source = stream;
@@ -505,15 +525,14 @@ add_body (mu_message_t inmsg, compose_env_t *env)
mu_body_t body;
mu_stream_t str;
struct atchinfo *aptr;
-
+
mu_message_get_body (inmsg, &body);
mu_body_get_streamref (body, &str);
aptr = mu_alloc (sizeof (*aptr));
aptr->id = NULL;
- aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL;
- aptr->content_type = mu_strdup (default_content_type ?
- default_content_type : "text/plain");
+ aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL;
+ attach_set_content_type (aptr, default_content_type);
aptr->name = NULL;
aptr->filename = NULL;
aptr->source = str;
diff --git a/mail/util.c b/mail/util.c
index f5db6b1b7..652f283c1 100644
--- a/mail/util.c
+++ b/mail/util.c
@@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list)
mu_list_destroy (list);
}
-void
-util_rfc2047_decode (char **value)
+char *
+util_get_charset (void)
{
- char *charset = NULL;
- char *tmp;
- int rc;
- struct mu_lc_all lc_all = { .flags = 0 };
+ char *charset;
- if (!*value || mailvar_get (&charset, "charset", mailvar_type_string, 0))
- return;
+ if (mailvar_get (&charset, "charset", mailvar_type_string, 0))
+ return NULL;
if (mu_c_strcasecmp (charset, "auto") == 0)
{
- tmp = getenv ("LC_ALL");
+ struct mu_lc_all lc_all = { .flags = 0 };
+ char *tmp = getenv ("LC_ALL");
if (!tmp)
tmp = getenv ("LANG");
-
+
if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0)
- charset = lc_all.charset;
+ {
+ charset = mu_strdup (lc_all.charset);
+ mu_lc_all_free (&lc_all);
+ }
+ else
+ charset = NULL;
}
+ else
+ charset = mu_strdup (charset);
+
+ return charset;
+}
+
+void
+util_rfc2047_decode (char **value)
+{
+ char *charset, *tmp;
+ int rc;
+ if (!*value)
+ return;
+ charset = util_get_charset ();
if (!charset)
return;
-
+
rc = mu_rfc2047_decode (charset, *value, &tmp);
+ free (charset);
+
if (rc)
{
if (mailvar_is_true ("verbose"))
@@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value)
free (*value);
*value = tmp;
}
- if (lc_all.flags)
- mu_lc_all_free (&lc_all);
}
const char *

Return to:

Send suggestions and report system problems to the System administrator.