From 65f599e407990f8600f05b06442c84f482573ec7 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 27 Mar 2022 11:55:50 +0200 Subject: [PATCH] mkvpropedit: add option for normalizing IETF BCP 47 language tags Part of the implementation of #3307. --- src/propedit/propedit_cli_parser.cpp | 18 ++++++++++++- src/propedit/propedit_cli_parser.h | 1 + tests/results.txt | 3 +++ ...propedit_normalize_language_ietf_tracks.rb | 23 +++++++++++++++++ ...opedit_normalize_language_ietf_chapters.rb | 24 ++++++++++++++++++ ...40propedit_normalize_language_ietf_tags.rb | 25 +++++++++++++++++++ 6 files changed, 93 insertions(+), 1 deletion(-) create mode 100755 tests/test-0738propedit_normalize_language_ietf_tracks.rb create mode 100755 tests/test-0739propedit_normalize_language_ietf_chapters.rb create mode 100755 tests/test-0740propedit_normalize_language_ietf_tags.rb diff --git a/src/propedit/propedit_cli_parser.cpp b/src/propedit/propedit_cli_parser.cpp index 5f4fddea3..718afdeff 100644 --- a/src/propedit/propedit_cli_parser.cpp +++ b/src/propedit/propedit_cli_parser.cpp @@ -214,6 +214,19 @@ propedit_cli_parser_c::disable_language_ietf() { mtx::bcp47::language_c::disable(); } +void +propedit_cli_parser_c::set_language_ietf_normalization_mode() { + if (m_next_arg.empty()) + mxerror(fmt::format(Y("'{0}' lacks its argument.\n"), "--normalize-language-ietf")); + + if ((m_next_arg != "canonical"s) && (m_next_arg != "extlang"s)) + mxerror(fmt::format(Y("'{0}' is not a valid language normalization mode.\n"), m_next_arg)); + + auto mode = m_next_arg == "canonical"s ? mtx::bcp47::normalization_mode_e::canonical : mtx::bcp47::normalization_mode_e::extlang; + + mtx::bcp47::language_c::set_normalization_mode(mode); +} + void propedit_cli_parser_c::enable_legacy_font_mime_types() { g_use_legacy_font_mime_types = true; @@ -252,7 +265,8 @@ propedit_cli_parser_c::init_parser() { add_option("attachment-uid=", std::bind(&propedit_cli_parser_c::set_attachment_uid, this), YT("Set the UID to use for the following '--add-attachment', '--replace-attachment' or '--update-attachment' option")); add_section_header(YT("Other options")); - add_option("disable-language-ietf", std::bind(&propedit_cli_parser_c::disable_language_ietf, this), YT("Do not change LanguageIETF track header elements when the 'language' property is changed.")); + add_option("disable-language-ietf", std::bind(&propedit_cli_parser_c::disable_language_ietf, this), YT("Do not change LanguageIETF track header elements when the 'language' property is changed.")); + add_option("normalize-language-ietf=", std::bind(&propedit_cli_parser_c::set_language_ietf_normalization_mode, this), YT("Normalize all IETF BCP 47 language tags of changed elements to either their canonical form (mode 'canonical') or their extended language subtags form (mode 'extlang')")); add_common_options(); add_separator(); @@ -281,6 +295,8 @@ propedit_cli_parser_c::init_parser() { add_information(YT("3. Either 'name:' or 'mime-type:' in which case the selector applies to all attachments whose name or MIME type respectively equals ."), 2); add_hook(mtx::cli::parser_c::ht_unknown_option, std::bind(&propedit_cli_parser_c::set_file_name, this)); + + set_to_parse_first("--normalize-language-ietf"); } void diff --git a/src/propedit/propedit_cli_parser.h b/src/propedit/propedit_cli_parser.h index a293f6be4..4bdc97918 100644 --- a/src/propedit/propedit_cli_parser.h +++ b/src/propedit/propedit_cli_parser.h @@ -40,6 +40,7 @@ protected: void set_file_name(); void disable_language_ietf(); void enable_legacy_font_mime_types(); + void set_language_ietf_normalization_mode(); void set_attachment_name(); void set_attachment_description(); diff --git a/tests/results.txt b/tests/results.txt index c18ff58f6..378fcb73c 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -582,3 +582,6 @@ T_0734legacy_language_code_fallback_via_extlang_prefix:27e262aae722c5306f6ffcfbe T_0735normalize_language_ietf_tracks:fc2fbd7d3978298f5e0a91b4cc222075-ok-46cd0d3e23f2293952e01cd72758ce25-ok-536dc6a75677c959a8fcccc0a389d651-ok:passed:20220327-103026:0.315131033 T_0736normalize_language_ietf_chapters:60865f8a77e2ba933357027ced7dda3b-ok-336926f4fed38eaaa240d260df7789fc-ok-a0ee62bbac0b3cdc4d8d285b9571eefe-ok:passed:20220327-103029:0.173048279 T_0737normalize_language_ietf_tags:b23b6fb1821313e506780642ba35435e-ok-b477e432c6701cbdbcdf4734bc524d91-ok-ed6c2ba405d9e276aca9a5cf0c540406-ok:passed:20220327-103814:0.178980995 +T_0738propedit_normalize_language_ietf_tracks:8c830d72272956b13cc467192241c52d-f727657a1cc3e8898503676c058de2a8-ok-8c830d72272956b13cc467192241c52d-667ca2882b82259f833bf4b4a4375e06-ok-8c830d72272956b13cc467192241c52d-99381838032c53bc1d4230c85bdaf9c0-ok:passed:20220327-114045:0.365823403 +T_0739propedit_normalize_language_ietf_chapters:fd026c48afdf123a9d500ad456f751d4-eb76e15349d1cbe0de47a82f7bcd8250-ok-fd026c48afdf123a9d500ad456f751d4-284c1493450e4a988846d6a2ced87cb8-ok-fd026c48afdf123a9d500ad456f751d4-f55da6687176bbda38205705d6f27d27-ok:passed:20220327-114610:0.353790859 +T_0740propedit_normalize_language_ietf_tags:f45e6f666f85b477a0d8c2296bea9291-66edf4dcad3528ff3506ae287979780c-ok-f45e6f666f85b477a0d8c2296bea9291-1f75e9469442a031d4602181b1151dbb-ok-f45e6f666f85b477a0d8c2296bea9291-f6b69443dab50658f393ce212103b347-ok:passed:20220327-115423:0.36762433 diff --git a/tests/test-0738propedit_normalize_language_ietf_tracks.rb b/tests/test-0738propedit_normalize_language_ietf_tracks.rb new file mode 100755 index 000000000..67aea8baa --- /dev/null +++ b/tests/test-0738propedit_normalize_language_ietf_tracks.rb @@ -0,0 +1,23 @@ +#!/usr/bin/ruby -w + +# T_738propedit_normalize_language_ietf_tracks +describe "mkvpropedit / normalize language IETF in tracks" + +def test0738 expected_languages, normalization_mode = nil + src = "data/subtitles/srt/ven.srt" + + test_merge src, :args => "#{src} #{src}", :keep_tmp => true + + test "propedit #{expected_languages}" do + normalization_mode = normalization_mode ? "--normalize-language-ietf #{normalization_mode}" : "" + + propedit tmp, "--edit track:1 --set language=fr-fx --edit track:2 --set language=zh-yue --edit track:3 --set language=yue #{normalization_mode}" + hash_tmp false + end + + compare_languages_tracks(*expected_languages) +end + +test0738 [ [ "fre", "fr-FX" ], [ "chi", "zh-yue" ], [ "chi", "yue" ] ] +test0738 [ [ "fre", "fr-FR" ], [ "chi", "yue" ], [ "chi", "yue" ] ], :canonical +test0738 [ [ "fre", "fr-FR" ], [ "chi", "zh-yue" ], [ "chi", "zh-yue" ] ], :extlang diff --git a/tests/test-0739propedit_normalize_language_ietf_chapters.rb b/tests/test-0739propedit_normalize_language_ietf_chapters.rb new file mode 100755 index 000000000..f81217153 --- /dev/null +++ b/tests/test-0739propedit_normalize_language_ietf_chapters.rb @@ -0,0 +1,24 @@ +#!/usr/bin/ruby -w + +# T_739propedit_normalize_language_ietf_chapters +describe "mkvpropedit / normalize language IETF in chapters" + +def test0739 expected_languages, normalization_mode = nil + src1 = "data/subtitles/srt/ven.srt" + src2 = "data/chapters/ietf-normalization-test.xml" + + test_merge src1, :keep_tmp => true + + test "propedit #{expected_languages}" do + normalization_mode = normalization_mode ? "--normalize-language-ietf #{normalization_mode}" : "" + + propedit tmp, "--chapters #{src2} #{normalization_mode}" + hash_tmp false + end + + compare_languages_chapters(*expected_languages) +end + +test0739 [ [ "fre", "fr-FX" ], [ "chi", "zh-yue" ], [ "chi", "yue" ] ] +test0739 [ [ "fre", "fr-FR" ], [ "chi", "yue" ], [ "chi", "yue" ] ], :canonical +test0739 [ [ "fre", "fr-FR" ], [ "chi", "zh-yue" ], [ "chi", "zh-yue" ] ], :extlang diff --git a/tests/test-0740propedit_normalize_language_ietf_tags.rb b/tests/test-0740propedit_normalize_language_ietf_tags.rb new file mode 100755 index 000000000..32ae6fd57 --- /dev/null +++ b/tests/test-0740propedit_normalize_language_ietf_tags.rb @@ -0,0 +1,25 @@ +#!/usr/bin/ruby -w + +# T_740propedit_normalize_language_ietf_tags +describe "mkvpropedit / normalize language IETF in tags" + +def test0740 expected_languages, normalization_mode = nil + src1 = "data/subtitles/srt/ven.srt" + src2 = "data/tags/one-tag-fr-FX.xml" + src3 = "data/tags/ietf-normalization-test.xml" + + test_merge src1, :args => "--tags 0:#{src2}", :keep_tmp => true + + test "propedit #{expected_languages}" do + normalization_mode = normalization_mode ? "--normalize-language-ietf #{normalization_mode}" : "" + + propedit tmp, "--tags global:#{src3} #{normalization_mode}" + hash_tmp false + end + + compare_languages_tags(*expected_languages) +end + +test0740 [ [ "fre", "fr-FX" ], [ "fre", "fr-FX" ], [ "chi", "zh-yue" ], [ "chi", "yue" ] ] +test0740 [ [ "fre", "fr-FX" ], [ "fre", "fr-FR" ], [ "chi", "yue" ], [ "chi", "yue" ] ], :canonical +test0740 [ [ "fre", "fr-FX" ], [ "fre", "fr-FR" ], [ "chi", "zh-yue" ], [ "chi", "zh-yue" ] ], :extlang