mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-24 11:54:01 +00:00
BCP47: add function to convert to canonical form
Part of the implementation of #3307.
This commit is contained in:
parent
37d48d5d2f
commit
5b6aab5f32
@ -783,6 +783,79 @@ language_c::find_best_match(std::vector<language_c> const &potential_matches)
|
||||
return best_match;
|
||||
}
|
||||
|
||||
language_c &
|
||||
language_c::canonicalize_preferred_values() {
|
||||
auto &preferred_values = mtx::iana::language_subtag_registry::g_preferred_values;
|
||||
auto idx = std::find_if(preferred_values.begin(), preferred_values.end(),
|
||||
[this](auto const &pair) {
|
||||
return matches(pair.first);
|
||||
});
|
||||
|
||||
if (idx == preferred_values.end())
|
||||
return *this;
|
||||
|
||||
// mxdebug(fmt::format("found one! I am {0} first is {1} second {2}\n", dump(), idx->first.dump(), idx->second.dump()));
|
||||
|
||||
auto const &[match, preferred] = *idx;
|
||||
|
||||
if (!preferred.m_language.empty()) {
|
||||
if (!match.m_language.empty())
|
||||
m_language.clear();
|
||||
|
||||
if (!match.m_extended_language_subtags.empty())
|
||||
m_extended_language_subtags.clear();
|
||||
|
||||
if (!match.m_script.empty())
|
||||
m_script.clear();
|
||||
|
||||
if (!match.m_region.empty())
|
||||
m_region.clear();
|
||||
|
||||
if (!match.m_variants.empty())
|
||||
m_variants.clear();
|
||||
|
||||
if (!match.m_extensions.empty())
|
||||
m_extensions.clear();
|
||||
|
||||
if (!match.m_private_use.empty())
|
||||
m_private_use.clear();
|
||||
|
||||
if (!match.m_grandfathered.empty())
|
||||
m_grandfathered.clear();
|
||||
}
|
||||
|
||||
if (!preferred.m_language.empty())
|
||||
m_language = preferred.m_language;
|
||||
|
||||
if (!preferred.m_extended_language_subtags.empty())
|
||||
m_extended_language_subtags = preferred.m_extended_language_subtags;
|
||||
|
||||
if (!preferred.m_script.empty())
|
||||
m_script = preferred.m_script;
|
||||
|
||||
if (!preferred.m_region.empty())
|
||||
m_region = preferred.m_region;
|
||||
|
||||
if (!preferred.m_variants.empty())
|
||||
m_variants = preferred.m_variants;
|
||||
|
||||
if (!preferred.m_extensions.empty())
|
||||
m_extensions = preferred.m_extensions;
|
||||
|
||||
if (!preferred.m_private_use.empty())
|
||||
m_private_use = preferred.m_private_use;
|
||||
|
||||
if (!preferred.m_grandfathered.empty())
|
||||
m_grandfathered = preferred.m_grandfathered;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
language_c &
|
||||
language_c::to_canonical_form() {
|
||||
return canonicalize_preferred_values();
|
||||
}
|
||||
|
||||
void
|
||||
language_c::disable() {
|
||||
ms_disabled = true;
|
||||
|
@ -77,6 +77,8 @@ public:
|
||||
bool matches(language_c const &match) const noexcept;
|
||||
language_c find_best_match(std::vector<language_c> const &potential_matches) const noexcept;
|
||||
|
||||
language_c &to_canonical_form();
|
||||
|
||||
language_c &set_valid(bool valid);
|
||||
language_c &set_language(std::string const &language);
|
||||
language_c &set_extended_language_subtags(std::vector<std::string> const &extended_language_subtags);
|
||||
@ -113,6 +115,8 @@ protected:
|
||||
bool validate_one_extlang(std::size_t extlang_index);
|
||||
bool matches_prefix(language_c const &prefix, std::size_t extlang_index, prefix_restrictions_t const &restrictions) const noexcept;
|
||||
|
||||
language_c &canonicalize_preferred_values();
|
||||
|
||||
public:
|
||||
static language_c parse(std::string const &language);
|
||||
|
||||
|
@ -423,4 +423,26 @@ TEST(BCP47LanguageTags, Grandfathered) {
|
||||
EXPECT_EQ("i-KLINGON"s, l.get_grandfathered());
|
||||
}
|
||||
|
||||
TEST(BCP47LanguageTags, ToCanonicalForm) {
|
||||
// No changes as they're already normalized.
|
||||
EXPECT_EQ("sgn"s, language_c::parse("sgn"s).to_canonical_form().format());
|
||||
EXPECT_EQ("nsi"s, language_c::parse("nsi"s).to_canonical_form().format());
|
||||
|
||||
// No changes as even though they're listed as redundant, they don't have preferred values.
|
||||
EXPECT_EQ("az-Arab"s, language_c::parse("az-Arab"s).to_canonical_form().format());
|
||||
|
||||
// For the following there are changes.
|
||||
EXPECT_EQ("nsi"s, language_c::parse("sgn-nsi"s).to_canonical_form().format());
|
||||
EXPECT_EQ("ja-Latn-alalc97"s, language_c::parse("ja-Latn-hepburn-heploc"s).to_canonical_form().format());
|
||||
EXPECT_EQ("jbo"s, language_c::parse("art-lojban"s).to_canonical_form().format());
|
||||
EXPECT_EQ("jsl"s, language_c::parse("sgn-JP"s).to_canonical_form().format());
|
||||
EXPECT_EQ("cmn"s, language_c::parse("zh-cmn"s).to_canonical_form().format());
|
||||
EXPECT_EQ("cmn-CN"s, language_c::parse("zh-cmn-CN"s).to_canonical_form().format());
|
||||
EXPECT_EQ("cmn-Hans"s, language_c::parse("zh-cmn-Hans"s).to_canonical_form().format());
|
||||
EXPECT_EQ("cmn"s, language_c::parse("zh-guoyu"s).to_canonical_form().format());
|
||||
EXPECT_EQ("hak"s, language_c::parse("zh-hakka"s).to_canonical_form().format());
|
||||
EXPECT_EQ("hak"s, language_c::parse("i-hak"s).to_canonical_form().format());
|
||||
EXPECT_EQ("yue-jyutping"s, language_c::parse("zh-yue-jyutping"s).to_canonical_form().format());
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user