summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2016-11-16 13:59:16 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2016-11-17 14:58:50 (GMT)
commit7e8606f27404fe8da3051f90eadef4076f2f7a08 (patch) (side-by-side diff)
tree1c59c67ed827eda92e32dba2955b9840cbdd4774
parentf55d72b20824261b5ee2fa6a87573b3f6ed2863a (diff)
downloadidest-7e8606f27404fe8da3051f90eadef4076f2f7a08.tar.gz
idest-7e8606f27404fe8da3051f90eadef4076f2f7a08.tar.bz2
Keep textual data in UTF-8 internally; provide new options for recoding
* libid3tag/tag.c (id3_tag_new): Use conservative default options. * configure.ac: Check for iconv * src/utf8conv.c: New file. * src/Makefile.am (idest_SOURCES): Add new source file. (LDADD): Link with iconv libraries. * src/backup.c (copy_file): Initialize fsize properly. * src/cmdline.opt: New options: --fixup, --broken-8bit-charset, --encoding * src/guile.c: Remove unused functions. * src/frametab.gperf: Use textual_frame_encoding. * src/idest.h: Include locale.h (latin1_option): Remove. (charset,broken_8bit_charset) (textual_frame_encoding,fixup_option): New globals. (idest_conv_mode): New enum (utf8_convert): New proto. * src/idop.c (idest_ucs4_cvt): Hanlde broken_8bit_charset option. (set_tags): Fix-up frames if so requested. * src/main.c (latin1_option): Remove. (textual_frame_encoding,fixup_option): New globals (main): Set locale. * bootstrap: Update. * gnulib: Update. * gnulib.modules: Add localcharset * doc/html.init: Minor change. * doc/idest.texi: Update docs.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--.gitignore1
-rw-r--r--NEWS29
-rwxr-xr-xbootstrap50
-rw-r--r--configure.ac12
-rw-r--r--doc/html.init3
-rw-r--r--doc/idest.texi150
m---------gnulib0
-rw-r--r--gnulib.modules1
-rw-r--r--libid3tag/tag.c4
-rw-r--r--libid3tag/utf16.c1
-rw-r--r--src/Makefile.am8
-rw-r--r--src/backup.c5
-rw-r--r--src/cmdline.opt41
-rw-r--r--src/frametab.gperf113
-rw-r--r--src/guile.c101
-rw-r--r--src/idest.h18
-rw-r--r--src/idop.c132
-rw-r--r--src/main.c7
-rw-r--r--src/utf8conv.c93
19 files changed, 479 insertions, 290 deletions
diff --git a/.gitignore b/.gitignore
index 7b25544..2b77ddb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/build-aux
*.a
*.o
*.tar.*
diff --git a/NEWS b/NEWS
index b01b065..11c88f9 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
-IdEst -- history of user-visible changes. 2015-11-06
-Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+IdEst -- history of user-visible changes. 2016-11-17
+Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
See the end of file for copying conditions.
Please send bug reports to <bug-idest@gnu.org.ua>
@@ -7,6 +7,29 @@ Please send bug reports to <bug-idest@gnu.org.ua>
Version 2.0.90 (Git)
+* Convert textual frames to/from the currently used character set
+
+The character set is deduced from the locale settings. It can also be
+set explicitly using the --charset option.
+
+* New option --broken-8bit-encoding
+
+Use this option for files where textual frames are stored as
+ISO-8859-1, but are actually using another 8-bit encoding. The
+argument to this option is the name of the enchoding actually used.
+
+In query mode, this option helps display such frames properly.
+
+Use it with the --fixup option to fix such frames in the file.
+
+* New option --encoding
+
+Specifies encoding to store textual frames in ID3 tags.
+
+* New option --fixup
+
+Modifies the ID3 v2 tag so that it can be understood by most devices.
+
* The --convert option can be used to remove unnecessary ID3 formats.
For example, if the file input.mp3 contains both version 1 and 2 tags,
@@ -246,7 +269,7 @@ Initial release.
=========================================================================
Copyright information:
-Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
Permission is granted to anyone to make or distribute verbatim copies
of this document as received, in any medium, provided that the
diff --git a/bootstrap b/bootstrap
index 08a7221..5d3c289 100755
--- a/bootstrap
+++ b/bootstrap
@@ -1,10 +1,10 @@
#! /bin/sh
# Print a version string.
-scriptversion=2014-12-08.12; # UTC
+scriptversion=2016-11-03.18; # UTC
# Bootstrap this package from checked-out sources.
-# Copyright (C) 2003-2015 Free Software Foundation, Inc.
+# Copyright (C) 2003-2016 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -42,7 +42,7 @@ export LC_ALL
local_gl_dir=gl
-# Honour $PERL, but work even if there is none
+# Honor $PERL, but work even if there is none.
PERL="${PERL-perl}"
me=$0
@@ -418,28 +418,30 @@ sort_ver() { # sort -V is not generally available
done
}
-get_version() {
- app=$1
+get_version_sed='
+# Move version to start of line.
+s/.*[v ]\([0-9]\)/\1/
- $app --version >/dev/null 2>&1 || { $app --version; return 1; }
+# Skip lines that do not start with version.
+/^[0-9]/!d
- $app --version 2>&1 |
- sed -n '# Move version to start of line.
- s/.*[v ]\([0-9]\)/\1/
+# Remove characters after the version.
+s/[^.a-z0-9-].*//
- # Skip lines that do not start with version.
- /^[0-9]/!d
+# The first component must be digits only.
+s/^\([0-9]*\)[a-z-].*/\1/
- # Remove characters after the version.
- s/[^.a-z0-9-].*//
+#the following essentially does s/5.005/5.5/
+s/\.0*\([1-9]\)/.\1/g
+p
+q'
- # The first component must be digits only.
- s/^\([0-9]*\)[a-z-].*/\1/
+get_version() {
+ app=$1
- #the following essentially does s/5.005/5.5/
- s/\.0*\([1-9]\)/.\1/g
- p
- q'
+ $app --version >/dev/null 2>&1 || { $app --version; return 1; }
+
+ $app --version 2>&1 | sed -n "$get_version_sed"
}
check_versions() {
@@ -942,6 +944,14 @@ find "$m4_base" "$source_base" \
# such as ylwrap.
AUTORECONFFLAGS="--verbose --install --force -I $m4_base $ACLOCAL_FLAGS"
+# Some systems (RHEL 5) are using ancient autotools, for which the
+# --no-recursive option had not been invented. Detect that lack and
+# omit the option when it's not supported. FIXME in 2017: remove this
+# hack when RHEL 5 autotools are updated, or when they become irrelevant.
+case $($AUTORECONF --help) in
+ *--no-recursive*) AUTORECONFFLAGS="$AUTORECONFFLAGS --no-recursive";;
+esac
+
# Tell autoreconf not to invoke autopoint or libtoolize; they were run above.
echo "running: AUTOPOINT=true LIBTOOLIZE=true $AUTORECONF $AUTORECONFFLAGS"
AUTOPOINT=true LIBTOOLIZE=true $AUTORECONF $AUTORECONFFLAGS \
@@ -1013,6 +1023,6 @@ echo "$0: done. Now you can run './configure'."
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-time-zone: "UTC"
+# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:
diff --git a/configure.ac b/configure.ac
index 163f1ee..d590639 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,6 @@
# -*- Autoconf -*-
# This file is part of Idest.
-# Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+# Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
#
# Idest is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -29,6 +29,16 @@ AC_PROG_CC
gl_EARLY
# Checks for libraries.
+AM_ICONV
+AC_CHECK_HEADERS(iconv.h)
+AC_CHECK_TYPE(iconv_t,:,
+ AC_DEFINE(iconv_t, int,
+ [Conversion descriptor type]),
+ [
+#ifdef HAVE_ICONV_H
+# include <iconv.h>
+#endif
+])
# Checks for header files.
AC_HEADER_STDC
diff --git a/doc/html.init b/doc/html.init
index 5157448..0cdd27d 100644
--- a/doc/html.init
+++ b/doc/html.init
@@ -1,5 +1,5 @@
# Texi2any configuration for idest documentation. -*- perl -*-
-# Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+# Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
#
# Idest is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -149,6 +149,7 @@ my @gray_buttons = (\&gray_document_title, ' ',
'FastBack', ' ', 'Up', ' ', 'FastForward', ' ',
'Contents', 'Index' );
+set_from_init_file('TOP_BUTTONS', undef);
set_from_init_file('SECTION_BUTTONS', \@gray_buttons);
set_from_init_file('CHAPTER_BUTTONS', \@gray_buttons);
set_from_init_file('NODE_FOOTER_BUTTONS', \@gray_buttons);
diff --git a/doc/idest.texi b/doc/idest.texi
index ae725f1..2b65ab7 100644
--- a/doc/idest.texi
+++ b/doc/idest.texi
@@ -32,7 +32,7 @@
@end ifinfo
@copying
-Copyright @copyright{} 2009-2011, 2015 Sergey Poznyakoff
+Copyright @copyright{} 2009-2011, 2015, 2016 Sergey Poznyakoff
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
@@ -353,37 +353,6 @@ surrounding white space. If the short option form (@option{-q}) is
used, the list must follow the option letter, with no white space in
between.
-@xopindex{latin1, introduced}
-@cindex UTF-8
-@cindex ISO-8859-1
-All textual fields are displayed in their original encoding. For ID3
-version 2 it is usually UTF-8. The @option{--latin1} option may be
-used to display them in ISO-8859-1, e.g.:
-
-@example
-$ idest --latin1 track06.mp3
-title: D@'ona'm sa m@`a
-album: Torna aviat
-track: 6
-comment:
-artist: Llu@'is Llach
-year: 1991
-genre:
-@end example
-
-Of course, @option{--latin1} and @option{--filter} (or
-@option{--query}) can be used together:
-
-@example
-$ idest --latin1 --filter=title,artist,album track06.mp3
-title: D@'ona'm sa m@`a
-artist: Llu@'is Llach
-album: Torna aviat
-@end example
-
-Future versions of @command{idest} will provide more sophisticated
-recoding facilities.
-
@anchor{all-frames query}
@anchor{fully qualified comment}
@xopindex{all, introduced}
@@ -404,6 +373,40 @@ year: 2009
genre:
@end example
+@xopindex{charset, introduced}
+@cindex charset
+@cindex character set
+All textual fields are displayed using the current locale settings.
+Sometimes it may be necessary to force displaying them in another
+locale. The @option{--charset} option allows you to do so. Its
+argument is a valid character set name. For example
+
+@example
+$ idest --charset=iso-8859-2 track06.mp3
+@end example
+
+This will cause all textual tags to be converted to iso-8859-2 on
+output. Notice that such conversion is not always possible, for
+example if the tag is stored internally in UTF-8 and is using
+characters not present in the iso-8859-2 plane.
+
+@anchor{broken 8bit encoding}
+@cindex 8-bit encodings
+@cindex broken 8-bit encodings
+@xopindex{broken-8bit-charset, introduced}
+You may occasionally encounter files with textual frames stored as
+iso-8859-1 strings, but actually using another 8-bit encoding. Such
+frames are displayed as sequences of unintelligible characters. You
+can display them properly if you know or can guess the actual
+character set they were written in. To do so, use the
+@option{--broken-8bit-charset} option. For example, the following
+command will assume all textual options use the iso-8859-2 character
+set and will convert them to the output character set:
+
+@example
+$ idest --broken-8bit-charset=iso-8859-2 dm.mp3
+@end example
+
@node Modify
@chapter Modifying Existing Tags
@@ -450,12 +453,43 @@ being set without qualifiers, all existing frames of this type will be
removed and replaced with the new instance. Its qualifiers will be set to
default values.
-@xopindex{latin1, using with --set}
-Textual fields will be saved in the UTF-8 encoding. To store them
-in ISO-8859-1, use the @option{--latin1} option:
+@xopindex{charset, using with --set}
+Textual strings are assumed to be written in the current locale. If
+that's not the case, use the @option{--charset} option, e.g.
+
+@example
+$ idest --charset=latin1 --set artist='Llu@'is Llach' *.mp3
+@end example
+
+Textual strings are stored in UTF-8 by default. If you prefer another
+encoding, specify it with the @option{--encoding} option. The ID3
+specification offers the following encodings: @samp{iso-8859-1} (or
+@samp{latin1}), @samp{utf-8}, @samp{utf-16}, and @samp{utf-16be} (the
+suffix stands for ``big-endian''). For example, to store texts in
+@samp{utf-16}:
@example
-$ idest --latin1 --set artist='Llu@'is Llach' *.mp3
+$ idest --encoding=utf-16 --set album='Itaca' *.mp3
+@end example
+
+@anchor{fixup}
+@xopindex{fixup, introduced}
+@cindex fixup
+@cindex frame fixup
+Not all devices support full ID3 specification. Most of them support
+only a subset of it. The @option{--fixup} command line option is
+provided to convert ID3 tags to a form understood by most devices.
+The usage is simple:
+
+@example
+$ idest --fixup *.mp3
+@end example
+
+If the input tags also contain malformed 8-bit encodings
+(@pxref{broken 8bit encoding}), you can fix them as shown below:
+
+@example
+$ idest --broken-8bit-encoding=iso-8859-1 --fixup *.mp3
@end example
@node Copy
@@ -1744,6 +1778,13 @@ Backup to given directory. @xref{Backups}.
@item --backup-suffix=@var{suf}
Set backup suffix, instead of the default @samp{~}. @xref{Backups}.
+@opsummary{broken-8bit-encoding}
+@item --broken-8bit-encoding=@var{charset}
+Textual frames are stored as @samp{ISO-8859-1} strings, but are
+actually using the specified 8bit @var{charset}. Use this option to
+properly convert such frames (@pxref{broken 8bit encoding}), or to fix
+them (@pxref{fixup, fixing broken 8bit strings}).
+
@opsummary{convert}
@item -C @var{version}
@itemx --convert=@var{version}
@@ -1757,6 +1798,15 @@ numbers.
@item --copy=@var{file}
Copy tags from @var{file} to destination files. @xref{Copy}.
+@opsummary{charset}
+@item --charset=@var{name}
+In query mode -- convert textual strings to character set @var{name}
+on output.
+
+In modify mode -- input strings are written using character set @var{name}.
+
+By default, character set is deduced from the locale settings in both cases.
+
@opsummary{delete}
@item -d[@var{flist}]
@itemx --delete[=@var{flist}]
@@ -1770,6 +1820,26 @@ are deleted. @xref{Delete}.
Print verbose frame descriptions instead of short names.
@xref{describe}.
+@opsummary{encoding}
+@item --encoding=@var{name}
+Specifies encoding for storing textual fields in ID3 tags. Valid only
+in modify mode. Valid encoding names are:
+
+@table @asis
+@item iso-8859-1
+@itemx latin1
+
+@item utf-8
+This is the default.
+
+@item utf-16
+UTF-16, little-endian
+
+@item utf-16be
+UTF-16, big-endian
+@end table
+
+
@opsummary{filter}
@item -F @var{flist}
@itemx --filter=@var{flist}
@@ -1779,6 +1849,10 @@ copy mode}), @option{--query} (@pxref{filter--query, filter in query
mode}), @option{--delete} (@pxref{filter--delete, filter in delete
mode}) and @option{--list-frames} (@pxref{filter--list-frames}).
+@opsummary{fixup}
+@item --fixup
+Attempt to fix the ID tags so that they are understood by most devices.
+
@opsummary{help}
@item -h
@itemx --help
@@ -1791,9 +1865,7 @@ Show tag structure information. @xref{Structure}.
@opsummary{latin1}
@item --latin1
-Force latin1 output, when used in query mode (@pxref{View,latin1}).
-Store strings in ISO-8859-1 encoding, when used with @option{--set}
-(@pxref{Modify, latin1}).
+Same as @option{--encoding=latin1}.
@opsummary{list-frames}
@item -L
diff --git a/gnulib b/gnulib
-Subproject 36d982f39b683d0266b9c6ff1e01cbfc94bd97f
+Subproject c3b131294aa42b7997cc9b9a0bbb2934aa27fd6
diff --git a/gnulib.modules b/gnulib.modules
index e8f410f..d3c61a4 100644
--- a/gnulib.modules
+++ b/gnulib.modules
@@ -7,6 +7,7 @@ getopt
gitlog-to-changelog
error
linked-list
+localcharset
progname
xalloc
xlist
diff --git a/libid3tag/tag.c b/libid3tag/tag.c
index 50d7cb1..4473bcc 100644
--- a/libid3tag/tag.c
+++ b/libid3tag/tag.c
@@ -1,6 +1,7 @@
/*
* libid3tag - ID3 tag manipulation library
* Copyright (C) 2000-2004 Underbit Technologies, Inc.
+ * Copyright (C) 2015-2016 Sergey Poznyakoff
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -61,8 +62,7 @@ id3_tag_new(void)
tag->flags = 0;
tag->extendedflags = 0;
tag->restrictions = 0;
- tag->options = /* ID3_TAG_OPTION_UNSYNCHRONISATION | */
- ID3_TAG_OPTION_COMPRESSION | ID3_TAG_OPTION_CRC;
+ tag->options = 0;
tag->nframes = 0;
tag->frames = 0;
tag->paddedsize = 0;
diff --git a/libid3tag/utf16.c b/libid3tag/utf16.c
index f30ebc5..04c6b04 100644
--- a/libid3tag/utf16.c
+++ b/libid3tag/utf16.c
@@ -1,6 +1,7 @@
/*
* libid3tag - ID3 tag manipulation library
* Copyright (C) 2000-2004 Underbit Technologies, Inc.
+ * Copyright (C) 2015-2016 Sergey Poznyakoff
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/Makefile.am b/src/Makefile.am
index 2c34303..8c4c895 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,5 +1,5 @@
# This file is part of Idest
-# Copyright (C) 2009-2011 Sergey Poznyakoff
+# Copyright (C) 2009-2011, 2016 Sergey Poznyakoff
#
# Idest is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,7 +26,8 @@ idest_SOURCES=\
idop.c\
main.c\
cmdline.h\
- slist.c
+ slist.c\
+ utf8conv.c
BUILT_SOURCES=cmdline.h frametab.c
EXTRA_DIST=cmdline.opt getopt.m4
AM_CPPFLAGS = \
@@ -37,7 +38,8 @@ AM_CPPFLAGS = \
-I$(top_builddir)/gnu\
-I$(top_srcdir)/libid3tag\
@GUILE_INCLUDES@
-LDADD=../gnu/libgnu.a ../libid3tag/libid3tag.a -lz @GUILE_LIBS@
+
+LDADD=../gnu/libgnu.a ../libid3tag/libid3tag.a -lz @GUILE_LIBS@ @LIBICONV@
SUFFIXES=.opt .c .h
.opt.h:
diff --git a/src/backup.c b/src/backup.c
index 9c85198..b268fb6 100644
--- a/src/backup.c
+++ b/src/backup.c
@@ -1,5 +1,5 @@
/* This file is part of Idest.
- Copyright (C) 2009-2011 Sergey Poznyakoff
+ Copyright (C) 2009-2011, 2016 Sergey Poznyakoff
Idest is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -137,6 +137,8 @@ copy_file(const char *file, const char *dst_file)
return 1;
}
+ fsize = st.st_size;
+
for (bufsize = fsize;
bufsize > 0 && (buf = malloc(bufsize)) == NULL;
bufsize /= 2);
@@ -144,7 +146,6 @@ copy_file(const char *file, const char *dst_file)
xalloc_die();
rc = 0;
- fsize = st.st_size;
while (fsize > 0) {
size_t rest;
size_t rdbytes;
diff --git a/src/cmdline.opt b/src/cmdline.opt
index 9ed798f..1f414a6 100644
--- a/src/cmdline.opt
+++ b/src/cmdline.opt
@@ -1,5 +1,5 @@
/* This file is part of Idest. -*- c -*-
- Copyright (C) 2009-2011 Sergey Poznyakoff
+ Copyright (C) 2009-2011, 2016 Sergey Poznyakoff
Idest is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -109,6 +109,13 @@ BEGIN
SET_MODE(MODE_LIST);
END
+OPTION(fixup,f,,
+ [<fixup ID3 tags>])
+BEGIN
+ fixup_option = 1;
+ SET_MODE(MODE_MOD);
+END
+
GROUP([<Operation modifiers>])
OPTION(filter,F,FRAME-LIST,
@@ -135,10 +142,38 @@ BEGIN
convert_version = get_version_list(optarg);
END
+OPTION(charset,,CODE,
+ [<use this charset for external representation>])
+BEGIN
+ charset = optarg;
+END
+
+OPTION(broken-8bit-charset,,CODE,
+ [<assume textual frames in this 8bit charset>])
+BEGIN
+ broken_8bit_charset = optarg;
+END
+
+OPTION(encoding,e,NAME,
+ [<set encoding for textual tags>])
+BEGIN
+ if (strcasecmp(optarg, "latin1") == 0
+ || strcasecmp(optarg, "iso-8859-1") == 0)
+ textual_frame_encoding = ID3_FIELD_TEXTENCODING_ISO_8859_1;
+ else if (strcasecmp(optarg, "utf-16") == 0)
+ textual_frame_encoding = ID3_FIELD_TEXTENCODING_UTF_16;
+ else if (strcasecmp(optarg, "utf-16be") == 0)
+ textual_frame_encoding = ID3_FIELD_TEXTENCODING_UTF_16BE;
+ else if (strcasecmp(optarg, "utf-8") == 0)
+ textual_frame_encoding = ID3_FIELD_TEXTENCODING_UTF_8;
+ else
+ error(1, 0, _("unsupported ID3 frame encoding"));
+END
+
OPTION(latin1,,,
- [<force latin1 output>])
+ [<same as --encoding=latin1>])
BEGIN
- latin1_option = 1;
+ charset = "iso-8859-1";
END
OPTION(verbose,v,,
diff --git a/src/frametab.gperf b/src/frametab.gperf
index 80d60f9..7863d08 100644
--- a/src/frametab.gperf
+++ b/src/frametab.gperf
@@ -1,6 +1,6 @@
%{
/* This file is part of Idest.
- Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+ Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
Idest is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -88,10 +88,7 @@ frame_field_from_string(struct id3_frame *frame, int n, const char *value)
return IDEST_ERR_NOFIELD;
if (type == ID3_FIELD_TYPE_TEXTENCODING) {
/* Special case */
- if (id3_field_settextencoding(field,
- latin1_option ?
- ID3_FIELD_TEXTENCODING_ISO_8859_1
- : ID3_FIELD_TEXTENCODING_UTF_8))
+ if (id3_field_settextencoding(field, textual_frame_encoding))
rc = IDEST_ERR_SET;
return rc;
}
@@ -117,7 +114,6 @@ frame_field_from_string(struct id3_frame *frame, int n, const char *value)
case ID3_FIELD_TYPE_LATIN1:
case ID3_FIELD_TYPE_LATIN1FULL:
- /* FIXME: Recode */
res = id3_field_setlatin1(field, (id3_latin1_t const *)value);
break;
@@ -127,34 +123,19 @@ frame_field_from_string(struct id3_frame *frame, int n, const char *value)
break;
case ID3_FIELD_TYPE_STRING:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setstring(field, ucs4);
free(ucs4);
break;
case ID3_FIELD_TYPE_STRINGFULL:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setfullstring(field, ucs4);
free(ucs4);
break;
case ID3_FIELD_TYPE_STRINGLIST:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setstrings(field, 1, &ucs4);
free(ucs4);
break;
@@ -200,10 +181,7 @@ frame_field_from_rawdata(struct id3_frame *frame, int n,
return IDEST_ERR_NOFIELD;
if (type == ID3_FIELD_TYPE_TEXTENCODING) {
/* Special case */
- if (id3_field_settextencoding(field,
- latin1_option ?
- ID3_FIELD_TEXTENCODING_ISO_8859_1
- : ID3_FIELD_TEXTENCODING_UTF_8))
+ if (id3_field_settextencoding(field, textual_frame_encoding))
rc = IDEST_ERR_SET;
return rc;
}
@@ -224,7 +202,6 @@ frame_field_from_rawdata(struct id3_frame *frame, int n,
case ID3_FIELD_TYPE_LATIN1:
case ID3_FIELD_TYPE_LATIN1FULL:
- /* FIXME: Recode */
res = id3_field_setlatin1(field, (id3_latin1_t const *)value);
break;
@@ -234,34 +211,19 @@ frame_field_from_rawdata(struct id3_frame *frame, int n,
break;
case ID3_FIELD_TYPE_STRING:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setstring(field, ucs4);
free(ucs4);
break;
case ID3_FIELD_TYPE_STRINGFULL:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setfullstring(field, ucs4);
free(ucs4);
break;
case ID3_FIELD_TYPE_STRINGLIST:
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *)value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *)value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
res = id3_field_setstrings(field, 1, &ucs4);
free(ucs4);
break;
@@ -340,12 +302,7 @@ comm_encode(struct id3_frame *frame, const struct ed_item *item)
return rc;
field = id3_frame_field(frame, 3);
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *) item->value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *) item->value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *) item->value);
if (id3_field_setfullstring(field, ucs4))
rc = IDEST_ERR_SET;
free(ucs4);
@@ -358,12 +315,11 @@ comm_decode(struct ed_item *item, struct id3_frame const *frame)
int rc;
union id3_field *field;
- if (rc = decode_qv(item, frame, 1, 2))
+ if ((rc = decode_qv(item, frame, 1, 2)))
return rc;
field = id3_frame_field(frame, 3);
if (!field)
return IDEST_ERR_NOFIELD;
- /* FIXME: Recode as necessary */
item->value = field_to_string(field, 0);
return 0;
@@ -374,19 +330,12 @@ text_encode(struct id3_frame *frame, struct ed_item const *item)
{
const char *value = item->value;
union id3_field *field;
- enum id3_field_textencoding encoding =
- (latin1_option ? ID3_FIELD_TEXTENCODING_ISO_8859_1
- : ID3_FIELD_TEXTENCODING_UTF_8);
id3_ucs4_t *ucs4;
-
+
field = id3_frame_field(frame, 0);
- id3_field_settextencoding(field, encoding);
+ id3_field_settextencoding(field, textual_frame_encoding);
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate((const id3_latin1_t *) value);
- else
- ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
-
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *)value);
field = id3_frame_field(frame, 1);
id3_field_setstrings(field, 1, &ucs4);
@@ -398,20 +347,18 @@ static int
text_decode(struct ed_item *item, struct id3_frame const *frame)
{
int isgenre = strcmp(frame->id, ID3_FRAME_GENRE) == 0;
- enum id3_field_textencoding encoding;
union id3_field *field;
-
- field = id3_frame_field(frame, 0);
- if (!field)
- return IDEST_ERR_NOFIELD;
- encoding = id3_field_gettextencoding(field);
- if (encoding == -1)
- return IDEST_ERR_BADCONV;
+ char *str;
+ int rc;
+
field = id3_frame_field(frame, 1);
if (!field)
return IDEST_ERR_NOFIELD;
- /* FIXME: Recode as necessary */
- item->value = field_to_string(field, isgenre);
+ str = field_to_string(field, isgenre);
+ rc = utf8_convert(idest_conv_decode, str, &item->value);
+ free(str);
+ if (rc)
+ return IDEST_ERR_BADCONV;
return 0;
}
@@ -447,12 +394,7 @@ txxx_encode(struct id3_frame *frame, const struct ed_item *item)
return rc;
field = id3_frame_field(frame, 2);
- if (latin1_option)
- ucs4 = id3_latin1_ucs4duplicate(
- (const id3_latin1_t *) item->value);
- else
- ucs4 = id3_utf8_ucs4duplicate(
- (const id3_utf8_t *) item->value);
+ ucs4 = id3_utf8_ucs4duplicate((const id3_utf8_t *) item->value);
if (id3_field_setstring(field, ucs4))
rc = IDEST_ERR_SET;
free(ucs4);
@@ -465,12 +407,11 @@ txxx_decode(struct ed_item *item, struct id3_frame const *frame)
int rc;
union id3_field *field;
- if (rc = decode_qv(item, frame, 1, 1))
+ if ((rc = decode_qv(item, frame, 1, 1)))
return rc;
field = id3_frame_field(frame, 2);
if (!field)
return IDEST_ERR_NOFIELD;
- /* FIXME: Recode as necessary */
item->value = field_to_string(field, 0);
return 0;
@@ -509,7 +450,6 @@ url_decode(struct ed_item *item, struct id3_frame const *frame)
field = id3_frame_field(frame, 0);
if (!field)
return IDEST_ERR_NOFIELD;
- /* FIXME: Recode as necessary */
item->value = field_to_string(field, 0);
return 0;
@@ -553,13 +493,12 @@ wxxx_decode(struct ed_item *item, struct id3_frame const *frame)
int rc;
union id3_field *field;
- if (rc = decode_qv(item, frame, 1, 1))
+ if ((rc = decode_qv(item, frame, 1, 1)))
return rc;
field = id3_frame_field(frame, 2);
if (!field)
return IDEST_ERR_NOFIELD;
- /* FIXME: Recode as necessary */
item->value = field_to_string(field, 0);
return 0;
@@ -603,7 +542,7 @@ apic_decode(struct ed_item *item, struct id3_frame const *frame)
int rc;
union id3_field *field;
- if (rc = decode_qv(item, frame, 1, 3))
+ if ((rc = decode_qv(item, frame, 1, 3)))
return rc;
field = id3_frame_field(frame, 4);
diff --git a/src/guile.c b/src/guile.c
index 7fe052f..802e053 100644
--- a/src/guile.c
+++ b/src/guile.c
@@ -1,5 +1,5 @@
/* This file is part of Idest.
- Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+ Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
Idest is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -129,102 +129,6 @@ idest_guile_error(const char *subr,
SCM_BOOL_F));
}
-
-static SCM
-stringlist_to_scm(union id3_field *field, int genre)
-{
- unsigned i, nstrings = id3_field_getnstrings(field);
- SCM head = SCM_EOL, tail = SCM_EOL;
-
- for (i = 0; i < nstrings; i++) {
- SCM cell;
- id3_ucs4_t const *ucs4;
- char *str;
-
- ucs4 = id3_field_getstrings(field, i);
- if (!ucs4)
- continue;
- if (genre)
- ucs4 = id3_genre_name(ucs4);
- str = idest_ucs4_cvt(ucs4);
- cell = scm_cons(scm_from_locale_string(str),
- SCM_EOL);
- free(str);
- if (head == SCM_EOL)
- head = cell;
- else
- SCM_SETCDR(tail, cell);
- tail = cell;
- }
- return scm_string_append(head);
-}
-
-static SCM
-field_to_scm(union id3_field *field, int genre)
-{
- id3_ucs4_t const *ucs4;
- char *str;
- SCM ret = SCM_EOL;
-
- if (!field)
- return ret;
- switch (id3_field_type(field)) {
- case ID3_FIELD_TYPE_TEXTENCODING:
- case ID3_FIELD_TYPE_INT8:
- case ID3_FIELD_TYPE_INT16:
- case ID3_FIELD_TYPE_INT24:
- case ID3_FIELD_TYPE_INT32:
- case ID3_FIELD_TYPE_INT32PLUS:
- ret = scm_from_long(field->number.value);
- break;
-
- case ID3_FIELD_TYPE_LATIN1:
- case ID3_FIELD_TYPE_LATIN1FULL:
- /* FIXME */
- ret = scm_from_locale_string((char*)id3_field_getlatin1(field));
- break;
-
- case ID3_FIELD_TYPE_LATIN1LIST:
- /* FIXME */
- break;
-
- case ID3_FIELD_TYPE_STRING:
- ucs4 = id3_field_getstring(field);;
- if (ucs4) {
- str = idest_ucs4_cvt(ucs4);
- ret = scm_from_locale_string(str);
- free(str);
- } else
- ret = SCM_BOOL_F;
- break;
-
- case ID3_FIELD_TYPE_STRINGFULL:
- ucs4 = id3_field_getfullstring(field);
- if (ucs4) {
- str = idest_ucs4_cvt(ucs4);
- ret = scm_from_locale_string(str);
- free(str);
- } else
- ret = SCM_BOOL_F;
- break;
-
- case ID3_FIELD_TYPE_STRINGLIST:
- ret = stringlist_to_scm(field, genre);
- break;
-
- case ID3_FIELD_TYPE_LANGUAGE:
- case ID3_FIELD_TYPE_FRAMEID:
- case ID3_FIELD_TYPE_DATE:
- ret = scm_from_locale_string(field->immediate.value);
- break;
-
- case ID3_FIELD_TYPE_BINARYDATA:
- /* FIXME */
- ret = SCM_EOL;
- }
- return ret;
-}
-
static SCM
frame_dump_to_scm(struct id3_frame *frame)
{
@@ -232,7 +136,7 @@ frame_dump_to_scm(struct id3_frame *frame)
union id3_field *field;
SCM head = SCM_EOL, tail = SCM_EOL;
- for (i = 0; field = id3_frame_field(frame, i); i++) {
+ for (i = 0; (field = id3_frame_field(frame, i)); i++) {
SCM cell;
char *s = field_to_string(field, 0);
@@ -403,7 +307,6 @@ scm_to_tag(SCM scm, struct id3_tag *tag)
for (; !scm_is_null(scm) && scm_is_pair(scm); scm = SCM_CDR(scm)) {
int rc;
int rawdata = 0;
- struct id3_frametype const *frametype;
struct id3_frame *frame;
struct ed_item itm;
char *id;
diff --git a/src/idest.h b/src/idest.h
index 977fdd1..2c10077 100644
--- a/src/idest.h
+++ b/src/idest.h
@@ -1,5 +1,5 @@
/* This file is part of Idest.
- Copyright (C) 2009-2011, 2015 Sergey Poznyakoff
+ Copyright (C) 2009-2011, 2015, 2016 Sergey Poznyakoff
Idest is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,6 +29,7 @@
#include <id3tag.h>
#include <frametype.h>
#include <assert.h>
+#include <locale.h>
#define gettext(s) s
#define _(s) s
@@ -67,7 +68,9 @@ struct ed_item {
#define MODE_LIST 4
extern int mode;
-extern int latin1_option;
+extern char const *charset;
+extern char const *broken_8bit_charset;
+extern enum id3_field_textencoding textual_frame_encoding;
extern int describe_option;
extern int verbose_option;
extern int all_frames;
@@ -82,6 +85,7 @@ extern unsigned default_version_option;
extern int guile_debug;
extern char **guile_argv;
extern int no_init_files_option;
+extern int fixup_option;
/* idop.c */
int guess_file_tag_options(struct id3_file *file, int *modified);
@@ -189,4 +193,14 @@ void guile_init(int *pargc, char ***pargv);
int guile_transform(const char *file, struct id3_tag *tag);
int guile_list(const char *file, struct id3_tag *tag);
void guile_add_load_path(const char *arg, int li);
+
+/* utf8conv.c */
+enum idest_conv_mode {
+ idest_conv_decode, /* convert from UTF-8 to the required charset */
+ idest_conv_encode, /* convert from required charset to UTF-8 */
+ idest_conv_recode, /* convert from broken 8bit charset to UTF-8 */
+};
+
+int utf8_convert(int mode, char const *input, char **output);
+
diff --git a/src/idop.c b/src/idop.c
index 0ce4c65..063ff78 100644
--- a/src/idop.c
+++ b/src/idop.c
@@ -191,10 +191,17 @@ safe_id3_file_update_and_close(struct id3_file *file)
char *
idest_ucs4_cvt(id3_ucs4_t const *ucs4)
{
- if (latin1_option)
- return (char*)id3_ucs4_latin1duplicate(ucs4);
- else
- return (char*)id3_ucs4_utf8duplicate(ucs4);
+ if (broken_8bit_charset) {
+ char *tempval = (char*)id3_ucs4_latin1duplicate(ucs4);
+ char *output;
+ int rc = utf8_convert(idest_conv_recode, tempval, &output);
+ free(tempval);
+ if (rc == 0)
+ return output;
+ error(0, 0, "can't recode value from %s to %s",
+ broken_8bit_charset, "utf-8");
+ }
+ return (char*) id3_ucs4_utf8duplicate(ucs4);
}
static void
@@ -419,7 +426,7 @@ find_matching_frame(struct id3_tag *tag, const struct ed_item *item,
frame = id3_tag_findframe(tag, item->id, 0);
else {
int i;
- for (i = 0; frame = id3_tag_findframe(tag, item->id, i); i++) {
+ for (i = 0; (frame = id3_tag_findframe(tag, item->id, i)); i++) {
if (cmp(frame, item) == 0)
break;
}
@@ -473,12 +480,13 @@ copy_source_tags(struct id3_tag *tag)
itm.name = xstrdup(frame->id);
memcpy(itm.id, frame->id, 4);
if (ft->decode(&itm, frame)) {
- error(0, 0, "%s: decoding failed", frame->id);
+ error(0, 0, "%s: decoding to %s failed",
+ frame->id, charset);
continue;
}
rc = update_frame(tag, &itm);
if (rc)
- error(1, 0,
+ error(0, 0,
"cannot set frame %s: %s",
frame->id, idest_strerror(rc));
ed_item_free_content(&itm);
@@ -487,6 +495,79 @@ copy_source_tags(struct id3_tag *tag)
return 1;
}
+static int
+update_frames(struct id3_tag *tag)
+{
+ gl_list_iterator_t itr;
+ const void *p;
+ int modified = 0;
+
+ itr = gl_list_iterator(input_list);
+ while (gl_list_iterator_next(&itr, &p, NULL)) {
+ struct ed_item *item = (struct ed_item *) p;
+
+ /* Recode value */
+ if (item->value) {
+ char *newval;
+ if (utf8_convert(idest_conv_encode,
+ item->value, &newval) == 0) {
+ free(item->value);
+ item->value = newval;
+ }
+ }
+
+ int rc = update_frame(tag, item);
+ if (rc)
+ error(1, 0,
+ "cannot set frame %s: %s",
+ item->id, idest_strerror(rc));
+ modified = 1;
+ }
+ gl_list_iterator_free(&itr);
+ return modified;
+}
+
+static void
+collect_text_frames(struct id3_tag *tag)
+{
+ struct id3_frame *frame;
+ unsigned i;
+
+ for (i = 0; (frame = id3_tag_findframe(tag, NULL, i)); i++) {
+ struct ed_item itm;
+ const struct idest_frametab *ft =
+ idest_frame_lookup(frame->id);
+ if (!ft) {
+ if (verbose_option)
+ error(0, 0,
+ "%s: unsupported text frame",
+ frame->id);
+ continue;
+ }
+
+ ed_item_zero(&itm);
+ itm.name = xstrdup(frame->id);
+ memcpy(itm.id, frame->id, 4);
+ if (ft->decode(&itm, frame)) {
+ error(0, 0, "%s: decoding to %s failed",
+ frame->id, charset);
+ continue;
+ }
+ if (!input_list)
+ input_list = ed_list_create();
+ gl_list_add_last(input_list, ed_item_dup(&itm));
+ ed_item_free_content(&itm);
+ }
+}
+
+static void
+fixup_charset(struct id3_tag *tag)
+{
+ if (!broken_8bit_charset)
+ return;
+ collect_text_frames(tag);
+}
+
void
set_tags(const char *name)
{
@@ -504,25 +585,20 @@ set_tags(const char *name)
if (!tag)
abort(); /* FIXME */
- if (input_list) {
- gl_list_iterator_t itr;
- const void *p;
-
- itr = gl_list_iterator(input_list);
- while (gl_list_iterator_next(&itr, &p, NULL)) {
- const struct ed_item *item = p;
- int rc = update_frame(tag, item);
- if (rc)
- error(1, 0,
- "cannot set frame %s: %s",
- item->id, idest_strerror(rc));
- modified |= 1;
- }
- gl_list_iterator_free(&itr);
- }
-
modified |= copy_source_tags(tag);
+ if (fixup_option) {
+ fixup_charset(tag);
+ id3_tag_options(tag,
+ ID3_TAG_OPTION_UNSYNCHRONISATION
+ | ID3_TAG_OPTION_COMPRESSION
+ | ID3_TAG_OPTION_CRC, 0);
+ modified |= 1;
+ }
+
+ if (input_list)
+ modified |= update_frames(tag);
+
/* FIXME */
modified |= guile_transform(name, tag);
@@ -601,7 +677,8 @@ show_tags(struct id3_tag *tag)
outitm.name = xstrdup(frame->id);
memcpy(outitm.id, frame->id, 4);
if (ft->decode(&outitm, frame)) {
- error(0, 0, "%s: decoding failed", frame->id);
+ error(0, 0, "%s: decoding to %s failed",
+ frame->id, charset);
continue;
}
output_list_append(&outitm, NULL);
@@ -617,7 +694,9 @@ show_tags(struct id3_tag *tag)
memcpy(outitm.id, ref->id, 4);
if (ft->decode(&outitm, frame)) {
error(0, 0,
- "%s: decoding failed", frame->id);
+ "%s: decoding to %s failed",
+ frame->id,
+ charset);
continue;
}
output_list_append(&outitm, ref);
@@ -682,3 +761,4 @@ info_id3(const char *name)
id3_file_struct_iterate(file, prinfo, NULL);
id3_file_close(file);
}
+
diff --git a/src/main.c b/src/main.c
index 65c2ef0..76a43fa 100644
--- a/src/main.c
+++ b/src/main.c
@@ -19,12 +19,13 @@
unsigned version_option = 0;
unsigned default_version_option = 0;
unsigned convert_version;
-int latin1_option = 0;
+enum id3_field_textencoding textual_frame_encoding = ID3_FIELD_TEXTENCODING_UTF_8;
enum backup_type backup_type = no_backups;
char *backup_dir;
int verbose_option = 0;
int describe_option = 0;
int dry_run_option = 0;
+int fixup_option = 0;
int all_frames = 0;
char *source_file;
@@ -156,7 +157,7 @@ void (*id3_mode[])(const char *) = {
query_id3,
set_id3,
del_id3,
- info_id3
+ info_id3,
};
int mode = MODE_QUERY;
@@ -189,6 +190,8 @@ main(int argc, char **argv)
{
struct id3_file *file = NULL;
+ setlocale (LC_ALL, "");
+
set_program_name(argv[0]);
get_options(argc, argv);
diff --git a/src/utf8conv.c b/src/utf8conv.c
new file mode 100644
index 0000000..448f15e
--- a/dev/null
+++ b/src/utf8conv.c
@@ -0,0 +1,93 @@
+/* This file is part of Idest.
+ Copyright (C) 2016 Sergey Poznyakoff
+
+ Idest is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Idest is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Idest. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "idest.h"
+#include <limits.h>
+#include <localcharset.h>
+#ifdef HAVE_ICONV_H
+# include <iconv.h>
+#endif
+
+#ifndef ICONV_CONST
+# define ICONV_CONST
+#endif
+
+#ifndef HAVE_ICONV
+# undef iconv_open
+# define iconv_open(tocode, fromcode) ((iconv_t) -1)
+# undef iconv
+# define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) ((size_t) 0)
+# undef iconv_close
+# define iconv_close(cd) 0
+#endif
+
+char const *charset;
+char const *broken_8bit_charset;
+
+static iconv_t conv_desc[] = { (iconv_t) -1, (iconv_t) -1, (iconv_t) -1 };
+
+static iconv_t
+utf8_init(int mode)
+{
+ if (!charset)
+ charset = locale_charset();
+ if (conv_desc[mode] == (iconv_t) -1) {
+ switch (mode) {
+ case idest_conv_decode:
+ conv_desc[mode] = iconv_open(charset, "UTF-8");
+ break;
+
+ case idest_conv_encode:
+ conv_desc[mode] = iconv_open("UTF-8", charset);
+ break;
+
+ case idest_conv_recode:
+ conv_desc[mode] = iconv_open("UTF-8", broken_8bit_charset);
+ }
+ }
+ return conv_desc[mode];
+}
+
+int
+utf8_convert(int mode, char const *input, char **output)
+{
+ char ICONV_CONST *ib;
+ char *bufptr, *ob;
+ size_t inlen;
+ size_t outlen;
+ size_t rc;
+ iconv_t cd = utf8_init(mode);
+
+ if (cd == 0) {
+ *output = xstrdup(input);
+ return 0;
+ } else if (cd == (iconv_t)-1)
+ return -1;
+
+ inlen = strlen(input) + 1;
+ outlen = inlen * MB_LEN_MAX + 1;
+ ob = bufptr = xmalloc(outlen);
+ ib = (char ICONV_CONST *) input;
+ rc = iconv(cd, &ib, &inlen, &ob, &outlen);
+ if (rc == (size_t)-1) {
+ free(bufptr);
+ return -1;
+ }
+
+ *ob = 0;
+ *output = bufptr;
+ return 0;
+}

Return to:

Send suggestions and report system problems to the System administrator.