mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-23 19:31:44 +00:00
GUI: BCP47: show warning if script should be suppressed
Part of the implementation of #3307.
This commit is contained in:
parent
3c52a6d26d
commit
c50e582fa4
3
NEWS.md
3
NEWS.md
@ -66,6 +66,9 @@
|
||||
variant's list of suitable prefixes. It'll also say if the corresponding
|
||||
canonical/extlang forms would have a suitable prefix. Part of the
|
||||
implementation of #3307.
|
||||
* MKVToolNix GUI: IETF BCP 47/RFC 5646 language tags: the language dialog now
|
||||
shows a warning if a script is used with a language for which it should be
|
||||
suppressed. Part of the implementation of #3307.
|
||||
|
||||
## Bug fixes
|
||||
|
||||
|
@ -26,6 +26,7 @@ namespace mtx::iana::language_subtag_registry {
|
||||
|
||||
std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
|
||||
std::vector<std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c>> g_preferred_values;
|
||||
std::unordered_map<std::string, std::string> g_suppress_scripts;
|
||||
|
||||
using VS = std::vector<std::string>;
|
||||
|
||||
@ -207,6 +208,19 @@ EOERB
|
||||
format_table(rows, :column_suffix => ',', :row_prefix => " g_preferred_values.emplace_back(", :row_suffix => ");").join("\n")
|
||||
end
|
||||
|
||||
def self.format_suppress_scripts entries
|
||||
name = "g_suppress_scripts"
|
||||
rows = (entries["language"] + entries["extlang"]).
|
||||
select { |e| !e[:suppress_script].blank? }.
|
||||
map { |e| [ e[:tag] || e[:subtag], e[:suppress_script] ] }.
|
||||
sort.
|
||||
uniq.
|
||||
map { |p| p.map(&:to_cpp_string) }
|
||||
|
||||
" #{name}.reserve(#{rows.size});\n\n" +
|
||||
format_table(rows, :column_suffix => ",", :row_prefix => " #{name}.insert_or_assign(", :row_suffix => ");").join("\n")
|
||||
end
|
||||
|
||||
def self.do_create_cpp entries, isdcf_entries
|
||||
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
|
||||
|
||||
@ -214,6 +228,7 @@ EOERB
|
||||
content_of[:init] = [
|
||||
self.format_extlangs_variants(entries, "extlang", "extlangs"), "",
|
||||
self.format_extlangs_variants(entries, "variant", "variants"), "",
|
||||
self.format_suppress_scripts(entries), "",
|
||||
self.format_grandfathered(entries),
|
||||
].join("\n")
|
||||
content_of[:init_preferred_values] = self.format_preferred_values(entries, isdcf_entries)
|
||||
|
@ -918,6 +918,32 @@ language_c::to_extlang_form() {
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
language_c::should_script_be_suppressed()
|
||||
const noexcept {
|
||||
if (m_script.empty())
|
||||
return false;
|
||||
|
||||
auto check = [this](std::string const &code) -> bool {
|
||||
if (code.empty())
|
||||
return false;
|
||||
|
||||
auto language = mtx::iso639::look_up(code);
|
||||
if (!language)
|
||||
return false;
|
||||
|
||||
auto const &suppressions = mtx::iana::language_subtag_registry::g_suppress_scripts;
|
||||
auto itr = suppressions.find(language->alpha_3_code);
|
||||
|
||||
if ((itr == suppressions.end()) && !language->alpha_2_code.empty())
|
||||
itr = suppressions.find(language->alpha_2_code);
|
||||
|
||||
return (itr != suppressions.end()) && (mtx::string::to_lower_ascii(itr->second) == mtx::string::to_lower_ascii(m_script));
|
||||
};
|
||||
|
||||
return check(m_language) || check(m_extended_language_subtag);
|
||||
}
|
||||
|
||||
void
|
||||
language_c::disable() {
|
||||
ms_disabled = true;
|
||||
|
@ -112,6 +112,7 @@ public:
|
||||
std::string const &get_grandfathered() const noexcept;
|
||||
|
||||
std::string get_first_variant_not_matching_prefixes() const noexcept;
|
||||
bool should_script_be_suppressed() const noexcept;
|
||||
|
||||
protected:
|
||||
std::string format_internal(bool force) const noexcept;
|
||||
|
@ -37,6 +37,7 @@ struct entry_t {
|
||||
|
||||
extern std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
|
||||
extern std::vector< std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c> > g_preferred_values;
|
||||
extern std::unordered_map<std::string, std::string> g_suppress_scripts;
|
||||
|
||||
void init();
|
||||
void init_preferred_values();
|
||||
|
@ -24,6 +24,7 @@ namespace mtx::iana::language_subtag_registry {
|
||||
|
||||
std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
|
||||
std::vector<std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c>> g_preferred_values;
|
||||
std::unordered_map<std::string, std::string> g_suppress_scripts;
|
||||
|
||||
using VS = std::vector<std::string>;
|
||||
|
||||
@ -395,6 +396,143 @@ init() {
|
||||
g_variants.emplace_back("wadegile"s, u8"Wade-Giles romanization"s, VS{ "zh-Latn"s }, false);
|
||||
g_variants.emplace_back("xsistemo"s, u8"Standard X-system orthographic fallback for spelling Esperanto"s, VS{ "eo"s }, false);
|
||||
|
||||
g_suppress_scripts.reserve(134);
|
||||
|
||||
g_suppress_scripts.insert_or_assign("ab"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("af"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("am"s, "Ethi"s);
|
||||
g_suppress_scripts.insert_or_assign("ar"s, "Arab"s);
|
||||
g_suppress_scripts.insert_or_assign("as"s, "Beng"s);
|
||||
g_suppress_scripts.insert_or_assign("ay"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("be"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("bg"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("bn"s, "Beng"s);
|
||||
g_suppress_scripts.insert_or_assign("bs"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ca"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ch"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("cs"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("cy"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("da"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("de"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("dsb"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("dv"s, "Thaa"s);
|
||||
g_suppress_scripts.insert_or_assign("dz"s, "Tibt"s);
|
||||
g_suppress_scripts.insert_or_assign("el"s, "Grek"s);
|
||||
g_suppress_scripts.insert_or_assign("en"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("eo"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("es"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("et"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("eu"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("fa"s, "Arab"s);
|
||||
g_suppress_scripts.insert_or_assign("fi"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("fj"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("fo"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("fr"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("frr"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("frs"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("fy"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ga"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("gl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("gn"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("gsw"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("gu"s, "Gujr"s);
|
||||
g_suppress_scripts.insert_or_assign("gv"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("he"s, "Hebr"s);
|
||||
g_suppress_scripts.insert_or_assign("hi"s, "Deva"s);
|
||||
g_suppress_scripts.insert_or_assign("hr"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("hsb"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ht"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("hu"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("hy"s, "Armn"s);
|
||||
g_suppress_scripts.insert_or_assign("id"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("in"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("is"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("it"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("iw"s, "Hebr"s);
|
||||
g_suppress_scripts.insert_or_assign("ja"s, "Jpan"s);
|
||||
g_suppress_scripts.insert_or_assign("ka"s, "Geor"s);
|
||||
g_suppress_scripts.insert_or_assign("kk"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("kl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("km"s, "Khmr"s);
|
||||
g_suppress_scripts.insert_or_assign("kn"s, "Knda"s);
|
||||
g_suppress_scripts.insert_or_assign("ko"s, "Kore"s);
|
||||
g_suppress_scripts.insert_or_assign("kok"s, "Deva"s);
|
||||
g_suppress_scripts.insert_or_assign("la"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("lb"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ln"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("lo"s, "Laoo"s);
|
||||
g_suppress_scripts.insert_or_assign("lt"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("lv"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mai"s, "Deva"s);
|
||||
g_suppress_scripts.insert_or_assign("men"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mg"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mh"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mk"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("ml"s, "Mlym"s);
|
||||
g_suppress_scripts.insert_or_assign("mo"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mr"s, "Deva"s);
|
||||
g_suppress_scripts.insert_or_assign("ms"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("mt"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("my"s, "Mymr"s);
|
||||
g_suppress_scripts.insert_or_assign("na"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nb"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nd"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nds"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ne"s, "Deva"s);
|
||||
g_suppress_scripts.insert_or_assign("niu"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nn"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("no"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nqo"s, "Nkoo"s);
|
||||
g_suppress_scripts.insert_or_assign("nr"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("nso"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ny"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("om"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("or"s, "Orya"s);
|
||||
g_suppress_scripts.insert_or_assign("pa"s, "Guru"s);
|
||||
g_suppress_scripts.insert_or_assign("pl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ps"s, "Arab"s);
|
||||
g_suppress_scripts.insert_or_assign("pt"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("qu"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("rm"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("rn"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ro"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ru"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("rw"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sg"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("si"s, "Sinh"s);
|
||||
g_suppress_scripts.insert_or_assign("sk"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sm"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("so"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sq"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ss"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("st"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sv"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("sw"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ta"s, "Taml"s);
|
||||
g_suppress_scripts.insert_or_assign("te"s, "Telu"s);
|
||||
g_suppress_scripts.insert_or_assign("tem"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("th"s, "Thai"s);
|
||||
g_suppress_scripts.insert_or_assign("ti"s, "Ethi"s);
|
||||
g_suppress_scripts.insert_or_assign("tkl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tmh"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tn"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("to"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tpi"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tr"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("ts"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("tvl"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("uk"s, "Cyrl"s);
|
||||
g_suppress_scripts.insert_or_assign("ur"s, "Arab"s);
|
||||
g_suppress_scripts.insert_or_assign("ve"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("vi"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("xh"s, "Latn"s);
|
||||
g_suppress_scripts.insert_or_assign("yi"s, "Hebr"s);
|
||||
g_suppress_scripts.insert_or_assign("zbl"s, "Blis"s);
|
||||
g_suppress_scripts.insert_or_assign("zu"s, "Latn"s);
|
||||
|
||||
g_grandfathered.reserve(26);
|
||||
|
||||
g_grandfathered.emplace_back("art-lojban"s, u8"Lojban"s, VS{}, true);
|
||||
|
@ -456,6 +456,10 @@ LanguageDialog::determineInfoAndWarningsFor(mtx::bcp47::language_c const &tag) {
|
||||
lists.second << QY("The script '%1' is deprecated.").arg(Q(tag.get_script()));
|
||||
}
|
||||
|
||||
if (tag.should_script_be_suppressed())
|
||||
lists.second << QY("The script '%1' should not be used for the language '%2' as it is the script the overwhelming majority of documents for this language is written in.")
|
||||
.arg(Q(tag.get_script())).arg(Q(tag.get_language().empty() ? tag.get_extended_language_subtag() : tag.get_language()));
|
||||
|
||||
if (!tag.get_region().empty()) {
|
||||
auto region = mtx::iso3166::look_up(tag.get_region());
|
||||
if (region && region->is_deprecated)
|
||||
|
@ -659,4 +659,14 @@ TEST(BCP47LanguageTags, VariantPrefixValidation) {
|
||||
EXPECT_EQ("ijekavsk"s, l.get_first_variant_not_matching_prefixes());
|
||||
}
|
||||
|
||||
TEST(BCP47LanguageTags, ShouldScriptBeSuppressed) {
|
||||
EXPECT_FALSE(language_c::parse("de").should_script_be_suppressed()); // no script to suppress
|
||||
EXPECT_FALSE(language_c::parse("de-CH").should_script_be_suppressed()); // no script to suppress
|
||||
EXPECT_TRUE(language_c::parse("de-Latn").should_script_be_suppressed());
|
||||
EXPECT_TRUE(language_c::parse("de-Latn-CH").should_script_be_suppressed());
|
||||
EXPECT_TRUE(language_c::parse("de-lATN-CH").should_script_be_suppressed());
|
||||
EXPECT_TRUE(language_c::parse("deu-LAtN-Ch").should_script_be_suppressed());
|
||||
EXPECT_TRUE(language_c::parse("ger-latn-ch").should_script_be_suppressed());
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user