mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-24 11:54:01 +00:00
BCP47: add function for finding the first variant not matching its prefixes
Part of the implementation of #3307.
This commit is contained in:
parent
e6bcf00a6d
commit
3048c61d92
@ -379,16 +379,9 @@ language_c::matches_prefix(language_c const &prefix,
|
||||
}
|
||||
|
||||
bool
|
||||
language_c::validate_extlang() {
|
||||
if (m_extended_language_subtag.empty())
|
||||
return true;
|
||||
|
||||
auto extlang = mtx::iana::language_subtag_registry::look_up_extlang(m_extended_language_subtag);
|
||||
|
||||
if (!extlang) // Should not happen as the parsing checks this already.
|
||||
return false;
|
||||
|
||||
if (extlang->prefixes.empty())
|
||||
language_c::validate_prefixes(std::vector<std::string> const &prefixes)
|
||||
const noexcept {
|
||||
if (prefixes.empty())
|
||||
return true;
|
||||
|
||||
prefix_restrictions_t restrictions;
|
||||
@ -399,7 +392,7 @@ language_c::validate_extlang() {
|
||||
value = true;
|
||||
};
|
||||
|
||||
for (auto const &prefix : extlang->prefixes) {
|
||||
for (auto const &prefix : prefixes) {
|
||||
parsed_prefixes.emplace_back(parse(prefix));
|
||||
auto const &tag = parsed_prefixes.back();
|
||||
|
||||
@ -414,6 +407,44 @@ language_c::validate_extlang() {
|
||||
if (matches_prefix(parsed_prefix, restrictions))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string
|
||||
language_c::get_first_variant_not_matching_prefixes()
|
||||
const noexcept {
|
||||
if (m_variants.empty())
|
||||
return {};
|
||||
|
||||
for (auto const &variant_str : m_variants) {
|
||||
auto variant = mtx::iana::language_subtag_registry::look_up_variant(variant_str);
|
||||
|
||||
if (!variant) // Should not happen as the parsing checks this already.
|
||||
continue;
|
||||
|
||||
if (variant->prefixes.empty())
|
||||
continue;
|
||||
|
||||
if (!validate_prefixes(variant->prefixes))
|
||||
return variant_str;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
bool
|
||||
language_c::validate_extlang() {
|
||||
if (m_extended_language_subtag.empty())
|
||||
return true;
|
||||
|
||||
auto extlang = mtx::iana::language_subtag_registry::look_up_extlang(m_extended_language_subtag);
|
||||
|
||||
if (!extlang) // Should not happen as the parsing checks this already.
|
||||
return false;
|
||||
|
||||
if (validate_prefixes(extlang->prefixes))
|
||||
return true;
|
||||
|
||||
auto message = Y("The extended language subtag '{}' must only be used with one of the following prefixes: {}.");
|
||||
m_parser_error = fmt::format(message, m_extended_language_subtag, fmt::join(extlang->prefixes, ", "));
|
||||
|
||||
|
@ -111,6 +111,8 @@ public:
|
||||
std::vector<std::string> const &get_private_use() const noexcept;
|
||||
std::string const &get_grandfathered() const noexcept;
|
||||
|
||||
std::string get_first_variant_not_matching_prefixes() const noexcept;
|
||||
|
||||
protected:
|
||||
std::string format_internal(bool force) const noexcept;
|
||||
|
||||
@ -124,6 +126,7 @@ protected:
|
||||
bool validate_extensions();
|
||||
bool validate_extlang();
|
||||
bool validate_variants();
|
||||
bool validate_prefixes(std::vector<std::string> const &prefixes) const noexcept;
|
||||
bool matches_prefix(language_c const &prefix, prefix_restrictions_t const &restrictions) const noexcept;
|
||||
|
||||
language_c &canonicalize_preferred_values();
|
||||
|
@ -567,4 +567,76 @@ TEST(BCP47LanguageTags, NormalizationForDCNCTags) {
|
||||
EXPECT_EQ("pt-BR"s, language_c::parse("QBP"s).format());
|
||||
}
|
||||
|
||||
TEST(BCP47LanguageTags, VariantPrefixValidation) {
|
||||
language_c::set_normalization_mode(norm_e::none);
|
||||
|
||||
auto l = language_c::parse("pt-BR"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // no variant
|
||||
|
||||
l = language_c::parse("da-DK-fonipa"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // variant without prefixes
|
||||
|
||||
l = language_c::parse("de-1996"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid
|
||||
|
||||
l = language_c::parse("de-DE-1996"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid
|
||||
|
||||
l = language_c::parse("fr-1996"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("1996"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: language code doesn't match
|
||||
|
||||
l = language_c::parse("pt-BR-abl1943"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid
|
||||
|
||||
l = language_c::parse("pt-abl1943"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("abl1943"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: pt-BR is valid, pt isn't
|
||||
|
||||
l = language_c::parse("zh-cmn-Latn-tongyong"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid
|
||||
|
||||
l = language_c::parse("yue-jyutping"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid
|
||||
|
||||
l = language_c::parse("zh-yue-jyutping"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("jyutping"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: yue would be a valid prefix but zh-yue isn't
|
||||
|
||||
l.to_canonical_form();
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid: zh-yue-jyutping canonicals to yue-jyutping & yue is a valid prefix
|
||||
|
||||
l = language_c::parse("cmn-pinyin"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("pinyin"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: missing Latn
|
||||
|
||||
l = language_c::parse("zh-cmn-pinyin"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("pinyin"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: missing Latn
|
||||
|
||||
l = language_c::parse("cmn-Latn-pinyin"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("pinyin"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: only valid for zh-Latn, not cmn-Latn
|
||||
|
||||
l.to_extlang_form();
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid now as extlang form is zh-cmn-Latn-pinyin
|
||||
|
||||
l = language_c::parse("zh-cmn-Latn-pinyin"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ(""s, l.get_first_variant_not_matching_prefixes()); // prefixes valid (directly)
|
||||
|
||||
l = language_c::parse("zh-cmn-Hans-pinyin"s);
|
||||
EXPECT_TRUE(l.is_valid());
|
||||
EXPECT_EQ("pinyin"s, l.get_first_variant_not_matching_prefixes()); // prefixes invalid: script not Latn
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user