BCP47: only sort extensions during canonicalization, not during parsing

Part of the implementation of #3307.
This commit is contained in:
Moritz Bunkus 2022-03-26 16:18:52 +01:00
parent 71036260ac
commit 23a00653b7
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
2 changed files with 16 additions and 6 deletions

View File

@ -323,8 +323,6 @@ language_c::parse_extensions(std::string const &str) {
else
m_extensions.back().extensions.emplace_back(part);
std::sort(m_extensions.begin(), m_extensions.end());
return validate_extensions();
}
@ -640,9 +638,7 @@ language_c::add_extension(extension_t const &extension) {
for (auto const &extension_subtag : extension.extensions)
extensions_lower.emplace_back(mtx::string::to_lower_ascii(extension_subtag));
auto cleaned_extension = extension_t{ mtx::string::to_lower_ascii(extension.identifier), extensions_lower };
m_extensions.insert(std::lower_bound(m_extensions.begin(), m_extensions.end(), cleaned_extension), cleaned_extension);
m_extensions.emplace_back(mtx::string::to_lower_ascii(extension.identifier), extensions_lower);
m_formatted_up_to_date = false;
return *this;
@ -851,6 +847,8 @@ language_c &
language_c::to_canonical_form() {
m_formatted_up_to_date = false;
std::sort(m_extensions.begin(), m_extensions.end());
return canonicalize_preferred_values();
}

View File

@ -91,6 +91,18 @@ TEST(BCP47LanguageTags, Formatting) {
l.set_valid(true);
EXPECT_EQ("ja-t-test-u-attr-co-phonebk-attr-zz-oooqqq"s, l.format());
l = language_c{};
l.set_language("ja");
l.add_extension({ "u"s, { "attr"s, "co"s, "phonebk"s, "attr"s, "zz"s, "oooqqq"s }});
l.add_extension({ "t"s, { "test"s }});
l.set_valid(true);
EXPECT_EQ("ja-u-attr-co-phonebk-attr-zz-oooqqq-t-test"s, l.format());
l.to_canonical_form();
EXPECT_EQ("ja-t-test-u-attr-co-phonebk-attr-zz-oooqqq"s, l.format());
}
TEST(BCP47LanguageTags, FormattingInvalidWithoutLanguage) {
@ -319,7 +331,7 @@ TEST(BCP47LanguageTags, ExtensionsRFC6497) {
TEST(BCP47LanguageTags, ExtensionsFormatting) {
EXPECT_EQ("ja-t-test-u-attr-co-phonebk"s, language_c::parse("ja-T-Test-U-AttR-CO-phoNEbk").format());
EXPECT_EQ("ja-t-test-u-attr-co-phonebk"s, language_c::parse("ja-U-AttR-CO-phoNEbk-T-Test").format());
EXPECT_EQ("ja-u-attr-co-phonebk-t-test"s, language_c::parse("ja-U-AttR-CO-phoNEbk-T-Test").format());
}
TEST(BCP47LanguageTags, Matching) {