Support for chosing the charset and language used in simple chapter files. Suggestion by Liisachan.

This commit is contained in:
Moritz Bunkus 2003-08-28 09:10:45 +00:00
parent 30831506f4
commit 2d74dde71b
5 changed files with 102 additions and 12 deletions

View File

@ -1,7 +1,10 @@
2003-08-28 Moritz Bunkus <moritz@bunkus.org>
* mkvmerge: Support for chosing the charset and language used in
simple chapter files. Suggestion by Liisachan.
* Rewrote the UTF-8 conversion routines. They should now handle
U+8000 characters correctly.
U+8000 characters correctly. Reported by Liisachan.
* mkvmerge: Real reader: For RV40 the actual dimensions were also
used for the aspect ratio/display dimensions. This has been fixed:

View File

@ -34,6 +34,16 @@ Write to the file '\fIout\fR'.
\fB\-\-title\fR <\fItitle\fR>
Sets the general title for the output file, e.g. the movie name.
.TP
\fB\-\-chapter\-language\fR <\fBlanguage\fR>
Sets the ISO639-2 language code that is written for each chapter entry. Applies
only to simple chapter files. Defaults to "eng". See the section about chapters
below for details.
.TP
\fB\-\-chapter\-charset\fR <\fBcharset\fR>
Sets the charset that is used for the conversion to UTF-8 for simple chapter
files. Defaults to the current system locale. See the section about chapters
below for details.
.TP
\fB\-\-chapters <\fIfile\fR>
Read chapter information from the \fIfile\fR. See the section about chapters
below for details.
@ -575,6 +585,14 @@ into one Matroska \fIChapterAtom\fR. It does not set any
\fIChapterTrackNumber\fR which means that the chapters all apply to all
tracks in the file.
.LP
The charset used in the file is assumed to be the same charset that the
current system's locale returns. If this is not the case then the swith
\fI\-\-chapter\-charset\fR should be used. If the file contains a valid
BOM (byte order marker) then all UTF styles are converted automatically.
In this case \fI\-\-chapter\-charset\fR is simply ignored. You can use
\fBmkvinfo\fR or \fBmkvextract\fR to verify that the chapter names have
been converted properly.
.LP
When splitting files \fBmkvmerge\fR will correctly adjust the chapters as
well. This means that each file only includes the chapter entries that
apply to it, and that the timecodes will be offset to match the new timecodes

View File

@ -127,14 +127,19 @@ static bool probe_simple_chapters(mm_text_io_c *in) {
// CHAPTER01NAME=Hallo Welt
static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc,
int64_t max_tc, int64_t offset) {
int64_t max_tc, int64_t offset,
const char *language,
const char *charset) {
KaxChapters *chaps;
KaxEditionEntry *edition;
KaxChapterAtom *atom;
KaxChapterDisplay *display;
int64_t start, hour, minute, second, msecs;
string name, line;
int mode, num;
int mode, num, cc_utf8;
bool do_convert;
char *recoded_string;
UTFstring wchar_string;
in->setFilePointer(0);
chaps = new KaxChapters;
@ -144,6 +149,16 @@ static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc,
edition = NULL;
num = 0;
if (in->get_byte_order() == BO_NONE) {
do_convert = true;
cc_utf8 = utf8_init(charset);
} else
do_convert = false;
if (language == NULL)
language = "eng";
while (in->getline2(line)) {
strip(line);
if (line.length() == 0)
@ -182,14 +197,23 @@ static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc,
*static_cast<EbmlUInteger *>(&GetChild<KaxChapterUID>(*atom)) =
create_unique_uint32();
*static_cast<EbmlUInteger *>(&GetChild<KaxChapterTimeStart>(*atom)) =
(start - offset) * 1000000;
display = &GetChild<KaxChapterDisplay>(*atom);
if (do_convert) {
recoded_string = to_utf8(cc_utf8, name.c_str());
wchar_string = cstrutf8_to_UTFstring(recoded_string);
safefree(recoded_string);
} else
wchar_string = cstrutf8_to_UTFstring(name.c_str());
*static_cast<EbmlUnicodeString *>
(&GetChild<KaxChapterString>(*display)) =
cstr_to_UTFstring(name.c_str());
(&GetChild<KaxChapterString>(*display)) = wchar_string;
*static_cast<EbmlString *>(&GetChild<KaxChapterLanguage>(*display)) =
"eng";
language;
num++;
}
@ -221,7 +245,8 @@ static KaxChapters *parse_xml_chapters(mm_text_io_c *, int64_t, int64_t,
// }}}
KaxChapters *parse_chapters(const char *file_name, int64_t min_tc,
int64_t max_tc, int64_t offset) {
int64_t max_tc, int64_t offset,
const char *language, const char *charset) {
mm_text_io_c *in;
try {
@ -231,7 +256,8 @@ KaxChapters *parse_chapters(const char *file_name, int64_t min_tc,
}
if (probe_simple_chapters(in))
return parse_simple_chapters(in, min_tc, max_tc, offset);
return parse_simple_chapters(in, min_tc, max_tc, offset, language,
charset);
if (probe_xml_chapters(in))
return parse_xml_chapters(in, min_tc, max_tc, offset);

View File

@ -28,7 +28,9 @@
using namespace libmatroska;
KaxChapters *parse_chapters(const char *file_name, int64_t min_tc = 0,
int64_t max_tc = -1, int64_t offset = 0);
int64_t max_tc = -1, int64_t offset = 0,
const char *language = NULL,
const char *charset = NULL);
void write_chapters_xml(KaxChapters *chapters, FILE *out);
void write_chapters_simple(int &chapter_num, KaxChapters *chapters, FILE *out);

View File

@ -164,6 +164,8 @@ KaxChapters *kax_chapters = NULL;
EbmlVoid *kax_chapters_void = NULL;
char *chapter_file_name = NULL;
char *chapter_language = NULL;
char *chapter_charset = NULL;
string title;
@ -226,6 +228,8 @@ static void usage() {
" -o, --output out Write to the file 'out'.\n"
" --title <title> Title for this output file.\n"
" --chapters <file> Read chapter information from the file.\n"
" --chapter-language <lng> Set the 'language' element in chapter entries."
"\n --chapter-charset <cset> Charset for a simple chapter file.\n"
" --global-tags <file> Read global tags from a XML file.\n"
"\n General output control (still global, advanced options):\n"
" --cluster-length <n[ms]> Put at most n data blocks into each cluster.\n"
@ -1248,16 +1252,52 @@ static void parse_args(int argc, char **argv) {
parse_and_add_tags(next_arg);
i++;
} else if (!strcmp(this_arg, "--chapter-language")) {
if (next_arg == NULL)
mxerror("'--chapter-language' lacks the language.\n");
if (chapter_language != NULL)
mxerror("'--chapter-language' may only be given once in '"
"--chapter-language %s'.\n", next_arg);
if (chapter_file_name != NULL)
mxerror("'--chapter-language' must be given before '--chapters' in "
"'--chapter-language %s'.\n", next_arg);
if (!is_valid_iso639_2_code(next_arg))
mxerror("'%s' is not a valid ISO639-2 language code. Run "
"'mkvmerge --list-languages' for a complete list of language "
"codes.\n", next_arg);
chapter_language = safestrdup(next_arg);
i++;
} else if (!strcmp(this_arg, "--chapter-charset")) {
if (next_arg == NULL)
mxerror("'--chapter-charset' lacks the charset.\n");
if (chapter_charset != NULL)
mxerror("'--chapter-charset' may only be given once in '"
"--chapter-charset %s'.\n", next_arg);
if (chapter_file_name != NULL)
mxerror("'--chapter-charset' must be given before '--chapters' in "
"'--chapter-charset %s'.\n", next_arg);
chapter_charset = safestrdup(next_arg);
i++;
} else if (!strcmp(this_arg, "--chapters")) {
if (next_arg == NULL)
mxerror("'--chapters' lacks the file name.\n");
if (kax_chapters != NULL)
if (chapter_file_name != NULL)
mxerror("Only one chapter file allowed in '%s %s'.\n", this_arg,
next_arg);
chapter_file_name = safestrdup(next_arg);
kax_chapters = parse_chapters(next_arg);
kax_chapters = parse_chapters(next_arg, 0, -1, 0, chapter_language,
chapter_charset);
i++;
} else if (!strcmp(this_arg, "--dump-packets")) {
@ -1782,7 +1822,8 @@ void finish_file() {
start = cluster_helper->get_first_timecode() + offset;
end = cluster_helper->get_max_timecode() + offset;
chapters_here = parse_chapters(chapter_file_name, start, end, offset);
chapters_here = parse_chapters(chapter_file_name, start, end, offset,
chapter_language, chapter_charset);
if (chapters_here != NULL)
kax_chapters_void->ReplaceWith(*chapters_here, *out, true);
delete kax_chapters_void;