module Mtx::IANALanguageSubtagRegistry @@list_cpp_content = <. */ // ---------------------------------------------------------------------------------------------- // NOTE: this file is auto-generated by the "dev:iana_language_subtag_registry_list" rake target. // ---------------------------------------------------------------------------------------------- #include "common/common_pch.h" #include "common/bcp47.h" #include "common/iana_language_subtag_registry.h" namespace mtx::iana::language_subtag_registry { std::vector g_extlangs, g_variants, g_grandfathered; std::vector> g_preferred_values; std::unordered_map g_suppress_scripts; using VS = std::vector; struct extlang_variant_init_t { char const *code, *description; char const *prefixes[<%= content_of[:max_num_prefixes] + 1 %>]; bool is_deprecated; }; struct suppress_script_init_t { char const *first, *second; }; struct preferred_values_init_t { struct sub_t { char const *tag, *region, *variant; mtx::bcp47::language_c parse() const; }; sub_t from, to; }; mtx::bcp47::language_c preferred_values_init_t::sub_t::parse() const { auto language = tag ? mtx::bcp47::language_c::parse(tag) : mtx::bcp47::language_c{}; if (region) language.set_region(region); if (variant) language.set_variants({ variant }); if (!tag) language.set_valid(true); return language; } static extlang_variant_init_t s_extlangs_init[] = { <%= content_of[:extlangs_init] %> }; static extlang_variant_init_t s_variants_init[] = { <%= content_of[:variants_init] %> }; static extlang_variant_init_t s_grandfathered_init[] = { <%= content_of[:grandfathered_init] %> }; static suppress_script_init_t s_suppress_scripts_init[] = { <%= content_of[:suppress_scripts_init] %> }; static preferred_values_init_t s_preferred_values_init[] = { <%= content_of[:preferred_values_init] %> }; void init() { g_extlangs.reserve(<%= content_of[:num_extlangs] %>); for (auto const *extlang = s_extlangs_init, *end = extlang + <%= content_of[:num_extlangs] %>; extlang < end; ++extlang) { g_extlangs.emplace_back(extlang->code, extlang->description, extlang->is_deprecated); auto &new_extlang = g_extlangs.back(); for (auto prefix = extlang->prefixes; *prefix; ++prefix) new_extlang.prefixes.emplace_back(*prefix); } g_variants.reserve(<%= content_of[:num_variants] %>); for (auto const *variant = s_variants_init, *end = variant + <%= content_of[:num_variants] %>; variant < end; ++variant) { g_variants.emplace_back(variant->code, variant->description, variant->is_deprecated); auto &new_variant = g_variants.back(); for (auto prefix = variant->prefixes; *prefix; ++prefix) new_variant.prefixes.emplace_back(*prefix); } g_suppress_scripts.reserve(<%= content_of[:num_suppress_scripts] %>); for (auto const *suppress_script = s_suppress_scripts_init, *end = suppress_script + <%= content_of[:num_suppress_scripts] %>; suppress_script < end; ++suppress_script) g_suppress_scripts.insert_or_assign(suppress_script->first, suppress_script->second); g_grandfathered.reserve(<%= content_of[:num_grandfathered] %>); for (auto const *grandfathered = s_grandfathered_init, *end = grandfathered + <%= content_of[:num_grandfathered] %>; grandfathered < end; ++grandfathered) g_grandfathered.emplace_back(grandfathered->code, grandfathered->description, grandfathered->is_deprecated); } void init_preferred_values() { mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::none); g_preferred_values.reserve(<%= content_of[:num_preferred_values] %>); for (auto const *preferred_value = s_preferred_values_init, *end = preferred_value + <%= content_of[:num_preferred_values] %>; preferred_value < end; ++preferred_value) g_preferred_values.emplace_back(preferred_value->from.parse(), preferred_value->to.parse()); mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::default_mode); } } // namespace mtx::iana::language_subtag_registry EOERB @@registry_mutex = Mutex.new @@registry = nil def self.fetch_registry @@registry_mutex.synchronize { return @@registry if @@registry shorten_description_for = %w{1959acad abl1943 ao1990 colb1945} @@registry = {} entry = {} process = lambda do type = entry[:type] if shorten_description_for.include? entry[:subtag] entry[:description].gsub!(%r{ +\(.*?\)}, '') end if type @@registry[type] ||= [] @@registry[type] << entry end entry = {} end current_sym = nil Mtx::OnlineFile.download("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry"). split(%r{\n+}). map(&:chomp). each do |line| if line == '%%' process.call current_sym = nil elsif %r{^Prefix: *(.+)}i.match(line) entry[:prefix] ||= [] entry[:prefix] << $1 current_sym = nil elsif %r{^(.*?): *(.+)}i.match(line) key, value = $1, $2 current_sym = key.downcase.gsub(%r{-}, '_').to_sym entry[current_sym] = value elsif %r{^ +(.+)}.match(line) && current_sym entry[current_sym] += " #{$1}" end end process.call } return @@registry end def self.fetch_isdcf_languages JSON.parse(Mtx::OnlineFile.download("https://registry.isdcf.com/languages", "isdcf_languages"))["data"] end def self.format_one_extlang_variant entry if entry[:prefix] prefix = '{ ' + entry[:prefix].sort.map(&:to_c_string).join(', ') + ', NULL }' else prefix = '{ NULL }' end [ entry[:subtag].downcase.to_c_string, entry[:description].to_u8_c_string, prefix, entry.key?(:deprecated).to_s, ] end def self.format_extlangs_variants entries, type, name rows = entries[type].map { |entry| self.format_one_extlang_variant entry } return entries[type].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") end def self.format_one_grandfathered entry [ entry[:tag].to_c_string, entry[:description].to_u8_c_string, '{ NULL }', 'true', ] end def self.format_grandfathered entries rows = entries["grandfathered"].map { |entry| self.format_one_grandfathered entry } return entries["grandfathered"].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") end def self.preferred_value_type_original type, pv return %r{-}.match(pv) ? :tag : type.to_sym end def self.preferred_value_type_target type, pv return %r{-|^[a-z]{2,3}$}.match(pv) ? :tag : type.to_sym end def self.format_one_preferred_value_construction pv_type, pv pv_str = pv.to_c_string tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL" region = :region == pv_type ? pv_str : "NULL" variant = :variant == pv_type ? pv_str : "NULL" fail "unknown pv_type #{pv_type}" if !tag && !region && !variant [ "{ #{tag}", region, variant, "}" ] end def self.format_one_preferred_value_target type, pv pv_type = self.preferred_value_type type, pv pv_str = pv.to_c_string tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL" region = :region == pv_type ? pv_str : "NULL" variant = :variant == pv_type ? pv_str : "NULL" fail "unknown pv_type #{pv_type}" if !tag && !region && !variant [ "{ #{tag}", region, variant, "}" ] end def self.format_one_preferred_value entry return self.format_one_preferred_value_construction(self.preferred_value_type_original(entry[:type], entry[:original_value]), entry[:original_value]) \ + self.format_one_preferred_value_construction(self.preferred_value_type_target( entry[:type], entry[:preferred_value]), entry[:preferred_value]) end def self.format_one_preferred_value_isdcf entry return self.format_one_preferred_value_construction(:tag, entry["dcncTag"]) \ + self.format_one_preferred_value_construction(:tag, entry["rfc5646Tag"]) end def self.format_preferred_values entries, isdcf_entries rows = entries. values. map { |v| v.select { |e| e.key?(:preferred_value) } }. flatten. map { |e| e[:original_value] = (e.key?(:prefix) ? e[:prefix].first + "-" : "") + (e[:subtag] || e[:tag]); e }. sort_by { |e| [ 10 - e[:original_value].gsub(%r{[^-]+}, '').length, e[:original_value].downcase ] }. map { |e| self.format_one_preferred_value e } rows += isdcf_entries. select { |e| %r{^Q[A-T][A-Z]$}.match(e["dcncTag"] || "") and !e["rfc5646Tag"].blank? }. map { |e| self.format_one_preferred_value_isdcf e } return rows.size, format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") end def self.format_suppress_scripts entries rows = (entries["language"] + entries["extlang"]). select { |e| !e[:suppress_script].blank? }. map { |e| [ e[:tag] || e[:subtag], e[:suppress_script] ] }. sort. uniq. map { |p| p.map(&:to_c_string) } return rows.size, format_table(rows, :column_suffix => ",", :row_prefix => " { ", :row_suffix => " },").join("\n") end def self.calculate_max_num_prefixes entries %w{extlang variant}. map { |type| entries[type] }. flatten. map { |entry| (entry[:prefix] || []).length }. max end def self.do_create_cpp entries, isdcf_entries cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp" content_of = Hash.new content_of[:max_num_prefixes] = self.calculate_max_num_prefixes(entries) content_of[:num_extlangs], content_of[:extlangs_init] = self.format_extlangs_variants(entries, "extlang", "extlangs") content_of[:num_variants], content_of[:variants_init] = self.format_extlangs_variants(entries, "variant", "variants") content_of[:num_suppress_scripts], content_of[:suppress_scripts_init] = self.format_suppress_scripts(entries) content_of[:num_grandfathered], content_of[:grandfathered_init] = self.format_grandfathered(entries) content_of[:num_preferred_values], content_of[:preferred_values_init] = self.format_preferred_values(entries, isdcf_entries) content = ERB.new(@@list_cpp_content).result(binding) runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 } end def self.create_cpp do_create_cpp(self.fetch_registry, self.fetch_isdcf_languages) end end