From 2d74dde71b089c70737a642cc9797da21d81c980 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Thu, 28 Aug 2003 09:10:45 +0000 Subject: [PATCH] Support for chosing the charset and language used in simple chapter files. Suggestion by Liisachan. --- ChangeLog | 5 ++++- doc/mkvmerge.1 | 18 ++++++++++++++++++ src/chapters.cpp | 40 +++++++++++++++++++++++++++++++++------- src/chapters.h | 4 +++- src/mkvmerge.cpp | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 102 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3660cb4d9..1fee18c7f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,10 @@ 2003-08-28 Moritz Bunkus + * mkvmerge: Support for chosing the charset and language used in + simple chapter files. Suggestion by Liisachan. + * Rewrote the UTF-8 conversion routines. They should now handle - U+8000 characters correctly. + U+8000 characters correctly. Reported by Liisachan. * mkvmerge: Real reader: For RV40 the actual dimensions were also used for the aspect ratio/display dimensions. This has been fixed: diff --git a/doc/mkvmerge.1 b/doc/mkvmerge.1 index 212d01d85..08ae27668 100644 --- a/doc/mkvmerge.1 +++ b/doc/mkvmerge.1 @@ -34,6 +34,16 @@ Write to the file '\fIout\fR'. \fB\-\-title\fR <\fItitle\fR> Sets the general title for the output file, e.g. the movie name. .TP +\fB\-\-chapter\-language\fR <\fBlanguage\fR> +Sets the ISO639-2 language code that is written for each chapter entry. Applies +only to simple chapter files. Defaults to "eng". See the section about chapters +below for details. +.TP +\fB\-\-chapter\-charset\fR <\fBcharset\fR> +Sets the charset that is used for the conversion to UTF-8 for simple chapter +files. Defaults to the current system locale. See the section about chapters +below for details. +.TP \fB\-\-chapters <\fIfile\fR> Read chapter information from the \fIfile\fR. See the section about chapters below for details. @@ -575,6 +585,14 @@ into one Matroska \fIChapterAtom\fR. It does not set any \fIChapterTrackNumber\fR which means that the chapters all apply to all tracks in the file. .LP +The charset used in the file is assumed to be the same charset that the +current system's locale returns. If this is not the case then the swith +\fI\-\-chapter\-charset\fR should be used. If the file contains a valid +BOM (byte order marker) then all UTF styles are converted automatically. +In this case \fI\-\-chapter\-charset\fR is simply ignored. You can use +\fBmkvinfo\fR or \fBmkvextract\fR to verify that the chapter names have +been converted properly. +.LP When splitting files \fBmkvmerge\fR will correctly adjust the chapters as well. This means that each file only includes the chapter entries that apply to it, and that the timecodes will be offset to match the new timecodes diff --git a/src/chapters.cpp b/src/chapters.cpp index 7bb9b9350..42a52c761 100644 --- a/src/chapters.cpp +++ b/src/chapters.cpp @@ -127,14 +127,19 @@ static bool probe_simple_chapters(mm_text_io_c *in) { // CHAPTER01NAME=Hallo Welt static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc, - int64_t max_tc, int64_t offset) { + int64_t max_tc, int64_t offset, + const char *language, + const char *charset) { KaxChapters *chaps; KaxEditionEntry *edition; KaxChapterAtom *atom; KaxChapterDisplay *display; int64_t start, hour, minute, second, msecs; string name, line; - int mode, num; + int mode, num, cc_utf8; + bool do_convert; + char *recoded_string; + UTFstring wchar_string; in->setFilePointer(0); chaps = new KaxChapters; @@ -144,6 +149,16 @@ static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc, edition = NULL; num = 0; + if (in->get_byte_order() == BO_NONE) { + do_convert = true; + cc_utf8 = utf8_init(charset); + + } else + do_convert = false; + + if (language == NULL) + language = "eng"; + while (in->getline2(line)) { strip(line); if (line.length() == 0) @@ -182,14 +197,23 @@ static KaxChapters *parse_simple_chapters(mm_text_io_c *in, int64_t min_tc, *static_cast(&GetChild(*atom)) = create_unique_uint32(); + *static_cast(&GetChild(*atom)) = (start - offset) * 1000000; + display = &GetChild(*atom); + + if (do_convert) { + recoded_string = to_utf8(cc_utf8, name.c_str()); + wchar_string = cstrutf8_to_UTFstring(recoded_string); + safefree(recoded_string); + } else + wchar_string = cstrutf8_to_UTFstring(name.c_str()); *static_cast - (&GetChild(*display)) = - cstr_to_UTFstring(name.c_str()); + (&GetChild(*display)) = wchar_string; + *static_cast(&GetChild(*display)) = - "eng"; + language; num++; } @@ -221,7 +245,8 @@ static KaxChapters *parse_xml_chapters(mm_text_io_c *, int64_t, int64_t, // }}} KaxChapters *parse_chapters(const char *file_name, int64_t min_tc, - int64_t max_tc, int64_t offset) { + int64_t max_tc, int64_t offset, + const char *language, const char *charset) { mm_text_io_c *in; try { @@ -231,7 +256,8 @@ KaxChapters *parse_chapters(const char *file_name, int64_t min_tc, } if (probe_simple_chapters(in)) - return parse_simple_chapters(in, min_tc, max_tc, offset); + return parse_simple_chapters(in, min_tc, max_tc, offset, language, + charset); if (probe_xml_chapters(in)) return parse_xml_chapters(in, min_tc, max_tc, offset); diff --git a/src/chapters.h b/src/chapters.h index 392acbacd..d86910535 100644 --- a/src/chapters.h +++ b/src/chapters.h @@ -28,7 +28,9 @@ using namespace libmatroska; KaxChapters *parse_chapters(const char *file_name, int64_t min_tc = 0, - int64_t max_tc = -1, int64_t offset = 0); + int64_t max_tc = -1, int64_t offset = 0, + const char *language = NULL, + const char *charset = NULL); void write_chapters_xml(KaxChapters *chapters, FILE *out); void write_chapters_simple(int &chapter_num, KaxChapters *chapters, FILE *out); diff --git a/src/mkvmerge.cpp b/src/mkvmerge.cpp index 8c5276860..c554a2fe9 100644 --- a/src/mkvmerge.cpp +++ b/src/mkvmerge.cpp @@ -164,6 +164,8 @@ KaxChapters *kax_chapters = NULL; EbmlVoid *kax_chapters_void = NULL; char *chapter_file_name = NULL; +char *chapter_language = NULL; +char *chapter_charset = NULL; string title; @@ -226,6 +228,8 @@ static void usage() { " -o, --output out Write to the file 'out'.\n" " --title Title for this output file.\n" " --chapters <file> Read chapter information from the file.\n" + " --chapter-language <lng> Set the 'language' element in chapter entries." + "\n --chapter-charset <cset> Charset for a simple chapter file.\n" " --global-tags <file> Read global tags from a XML file.\n" "\n General output control (still global, advanced options):\n" " --cluster-length <n[ms]> Put at most n data blocks into each cluster.\n" @@ -1248,16 +1252,52 @@ static void parse_args(int argc, char **argv) { parse_and_add_tags(next_arg); i++; + } else if (!strcmp(this_arg, "--chapter-language")) { + if (next_arg == NULL) + mxerror("'--chapter-language' lacks the language.\n"); + + if (chapter_language != NULL) + mxerror("'--chapter-language' may only be given once in '" + "--chapter-language %s'.\n", next_arg); + + if (chapter_file_name != NULL) + mxerror("'--chapter-language' must be given before '--chapters' in " + "'--chapter-language %s'.\n", next_arg); + + if (!is_valid_iso639_2_code(next_arg)) + mxerror("'%s' is not a valid ISO639-2 language code. Run " + "'mkvmerge --list-languages' for a complete list of language " + "codes.\n", next_arg); + + chapter_language = safestrdup(next_arg); + i++; + + } else if (!strcmp(this_arg, "--chapter-charset")) { + if (next_arg == NULL) + mxerror("'--chapter-charset' lacks the charset.\n"); + + if (chapter_charset != NULL) + mxerror("'--chapter-charset' may only be given once in '" + "--chapter-charset %s'.\n", next_arg); + + if (chapter_file_name != NULL) + mxerror("'--chapter-charset' must be given before '--chapters' in " + "'--chapter-charset %s'.\n", next_arg); + + chapter_charset = safestrdup(next_arg); + i++; + } else if (!strcmp(this_arg, "--chapters")) { if (next_arg == NULL) mxerror("'--chapters' lacks the file name.\n"); - if (kax_chapters != NULL) + if (chapter_file_name != NULL) mxerror("Only one chapter file allowed in '%s %s'.\n", this_arg, next_arg); chapter_file_name = safestrdup(next_arg); - kax_chapters = parse_chapters(next_arg); + kax_chapters = parse_chapters(next_arg, 0, -1, 0, chapter_language, + chapter_charset); i++; } else if (!strcmp(this_arg, "--dump-packets")) { @@ -1782,7 +1822,8 @@ void finish_file() { start = cluster_helper->get_first_timecode() + offset; end = cluster_helper->get_max_timecode() + offset; - chapters_here = parse_chapters(chapter_file_name, start, end, offset); + chapters_here = parse_chapters(chapter_file_name, start, end, offset, + chapter_language, chapter_charset); if (chapters_here != NULL) kax_chapters_void->ReplaceWith(*chapters_here, *out, true); delete kax_chapters_void;