2020-09-06 17:51:58 +00:00
|
|
|
module Mtx::IANALanguageSubtagRegistry
|
2022-03-29 17:56:14 +00:00
|
|
|
@@list_cpp_content = <<EOERB
|
|
|
|
/*
|
|
|
|
mkvmerge -- utility for splicing together matroska files
|
|
|
|
from component media subtypes
|
|
|
|
|
|
|
|
Distributed under the GPL v2
|
|
|
|
see the file COPYING for details
|
|
|
|
or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
|
|
|
|
|
|
|
|
IANA language subtag registry
|
|
|
|
|
|
|
|
Written by Moritz Bunkus <moritz@bunkus.org>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------------
|
|
|
|
// NOTE: this file is auto-generated by the "dev:iana_language_subtag_registry_list" rake target.
|
|
|
|
// ----------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
#include "common/common_pch.h"
|
|
|
|
|
|
|
|
#include "common/bcp47.h"
|
|
|
|
#include "common/iana_language_subtag_registry.h"
|
|
|
|
|
|
|
|
namespace mtx::iana::language_subtag_registry {
|
|
|
|
|
|
|
|
std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
|
|
|
|
std::vector<std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c>> g_preferred_values;
|
2022-03-29 19:15:28 +00:00
|
|
|
std::unordered_map<std::string, std::string> g_suppress_scripts;
|
2022-03-29 17:56:14 +00:00
|
|
|
|
|
|
|
using VS = std::vector<std::string>;
|
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
struct extlang_variant_init_t {
|
|
|
|
char const *code, *description;
|
|
|
|
char const *prefixes[<%= content_of[:max_num_prefixes] + 1 %>];
|
|
|
|
bool is_deprecated;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct suppress_script_init_t {
|
|
|
|
char const *first, *second;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct preferred_values_init_t {
|
|
|
|
struct sub_t {
|
|
|
|
char const *tag, *region, *variant;
|
|
|
|
|
|
|
|
mtx::bcp47::language_c parse() const;
|
|
|
|
};
|
|
|
|
|
|
|
|
sub_t from, to;
|
|
|
|
};
|
|
|
|
|
|
|
|
mtx::bcp47::language_c
|
|
|
|
preferred_values_init_t::sub_t::parse()
|
|
|
|
const {
|
|
|
|
|
|
|
|
auto language = tag ? mtx::bcp47::language_c::parse(tag) : mtx::bcp47::language_c{};
|
|
|
|
|
|
|
|
if (region)
|
|
|
|
language.set_region(region);
|
|
|
|
|
|
|
|
if (variant)
|
|
|
|
language.set_variants({ variant });
|
|
|
|
|
|
|
|
if (!tag)
|
|
|
|
language.set_valid(true);
|
|
|
|
|
|
|
|
return language;
|
|
|
|
}
|
|
|
|
|
|
|
|
static extlang_variant_init_t s_extlangs_init[] = {
|
|
|
|
<%= content_of[:extlangs_init] %>
|
|
|
|
};
|
|
|
|
|
|
|
|
static extlang_variant_init_t s_variants_init[] = {
|
|
|
|
<%= content_of[:variants_init] %>
|
|
|
|
};
|
|
|
|
|
|
|
|
static extlang_variant_init_t s_grandfathered_init[] = {
|
|
|
|
<%= content_of[:grandfathered_init] %>
|
|
|
|
};
|
|
|
|
|
|
|
|
static suppress_script_init_t s_suppress_scripts_init[] = {
|
|
|
|
<%= content_of[:suppress_scripts_init] %>
|
|
|
|
};
|
|
|
|
|
|
|
|
static preferred_values_init_t s_preferred_values_init[] = {
|
|
|
|
<%= content_of[:preferred_values_init] %>
|
|
|
|
};
|
|
|
|
|
2022-03-29 17:56:14 +00:00
|
|
|
void
|
|
|
|
init() {
|
2022-04-22 20:06:21 +00:00
|
|
|
g_extlangs.reserve(<%= content_of[:num_extlangs] %>);
|
|
|
|
|
|
|
|
for (auto const *extlang = s_extlangs_init, *end = extlang + <%= content_of[:num_extlangs] %>; extlang < end; ++extlang) {
|
|
|
|
g_extlangs.emplace_back(extlang->code, extlang->description, extlang->is_deprecated);
|
|
|
|
|
|
|
|
auto &new_extlang = g_extlangs.back();
|
|
|
|
for (auto prefix = extlang->prefixes; *prefix; ++prefix)
|
|
|
|
new_extlang.prefixes.emplace_back(*prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_variants.reserve(<%= content_of[:num_variants] %>);
|
|
|
|
|
|
|
|
for (auto const *variant = s_variants_init, *end = variant + <%= content_of[:num_variants] %>; variant < end; ++variant) {
|
|
|
|
g_variants.emplace_back(variant->code, variant->description, variant->is_deprecated);
|
|
|
|
|
|
|
|
auto &new_variant = g_variants.back();
|
|
|
|
for (auto prefix = variant->prefixes; *prefix; ++prefix)
|
|
|
|
new_variant.prefixes.emplace_back(*prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_suppress_scripts.reserve(<%= content_of[:num_suppress_scripts] %>);
|
|
|
|
|
|
|
|
for (auto const *suppress_script = s_suppress_scripts_init, *end = suppress_script + <%= content_of[:num_suppress_scripts] %>; suppress_script < end; ++suppress_script)
|
|
|
|
g_suppress_scripts.insert_or_assign(suppress_script->first, suppress_script->second);
|
|
|
|
|
|
|
|
g_grandfathered.reserve(<%= content_of[:num_grandfathered] %>);
|
|
|
|
|
|
|
|
for (auto const *grandfathered = s_grandfathered_init, *end = grandfathered + <%= content_of[:num_grandfathered] %>; grandfathered < end; ++grandfathered)
|
|
|
|
g_grandfathered.emplace_back(grandfathered->code, grandfathered->description, grandfathered->is_deprecated);
|
2022-03-29 17:56:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
init_preferred_values() {
|
|
|
|
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::none);
|
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
g_preferred_values.reserve(<%= content_of[:num_preferred_values] %>);
|
|
|
|
|
|
|
|
for (auto const *preferred_value = s_preferred_values_init, *end = preferred_value + <%= content_of[:num_preferred_values] %>; preferred_value < end; ++preferred_value)
|
|
|
|
g_preferred_values.emplace_back(preferred_value->from.parse(), preferred_value->to.parse());
|
2022-03-29 17:56:14 +00:00
|
|
|
|
|
|
|
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::default_mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace mtx::iana::language_subtag_registry
|
|
|
|
EOERB
|
|
|
|
|
|
|
|
|
2021-07-17 09:18:07 +00:00
|
|
|
@@registry_mutex = Mutex.new
|
|
|
|
@@registry = nil
|
2020-09-07 15:49:06 +00:00
|
|
|
|
|
|
|
def self.fetch_registry
|
|
|
|
@@registry_mutex.synchronize {
|
|
|
|
return @@registry if @@registry
|
|
|
|
|
2021-08-04 20:47:48 +00:00
|
|
|
shorten_description_for = %w{1959acad abl1943 ao1990 colb1945}
|
|
|
|
@@registry = {}
|
|
|
|
entry = {}
|
|
|
|
process = lambda do
|
2022-03-25 23:21:58 +00:00
|
|
|
type = entry[:type]
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2021-08-04 20:47:48 +00:00
|
|
|
if shorten_description_for.include? entry[:subtag]
|
|
|
|
entry[:description].gsub!(%r{ +\(.*?\)}, '')
|
|
|
|
end
|
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
if type
|
|
|
|
@@registry[type] ||= []
|
|
|
|
@@registry[type] << entry
|
|
|
|
end
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
entry = {}
|
2020-09-06 17:51:58 +00:00
|
|
|
end
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2021-08-04 20:47:48 +00:00
|
|
|
current_sym = nil
|
|
|
|
|
2021-07-17 09:18:07 +00:00
|
|
|
Mtx::OnlineFile.download("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry").
|
|
|
|
split(%r{\n+}).
|
2020-09-07 15:49:06 +00:00
|
|
|
map(&:chomp).
|
|
|
|
each do |line|
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
if line == '%%'
|
|
|
|
process.call
|
2021-08-04 20:47:48 +00:00
|
|
|
current_sym = nil
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
elsif %r{^Prefix: *(.+)}i.match(line)
|
|
|
|
entry[:prefix] ||= []
|
|
|
|
entry[:prefix] << $1
|
2021-08-04 20:47:48 +00:00
|
|
|
current_sym = nil
|
|
|
|
|
|
|
|
elsif %r{^(.*?): *(.+)}i.match(line)
|
2022-03-25 23:21:58 +00:00
|
|
|
key, value = $1, $2
|
|
|
|
current_sym = key.downcase.gsub(%r{-}, '_').to_sym
|
|
|
|
entry[current_sym] = value
|
2021-08-04 20:47:48 +00:00
|
|
|
|
|
|
|
elsif %r{^ +(.+)}.match(line) && current_sym
|
|
|
|
entry[current_sym] += " #{$1}"
|
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
end
|
2020-09-06 17:51:58 +00:00
|
|
|
end
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
process.call
|
|
|
|
}
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-07 15:49:06 +00:00
|
|
|
return @@registry
|
2020-07-01 17:29:09 +00:00
|
|
|
end
|
|
|
|
|
2022-03-28 16:45:19 +00:00
|
|
|
def self.fetch_isdcf_languages
|
|
|
|
JSON.parse(Mtx::OnlineFile.download("https://registry.isdcf.com/languages", "isdcf_languages"))["data"]
|
|
|
|
end
|
|
|
|
|
2022-03-25 21:07:27 +00:00
|
|
|
def self.format_one_extlang_variant entry
|
|
|
|
if entry[:prefix]
|
2022-04-22 20:06:21 +00:00
|
|
|
prefix = '{ ' + entry[:prefix].sort.map(&:to_c_string).join(', ') + ', NULL }'
|
2022-03-25 21:07:27 +00:00
|
|
|
else
|
2022-04-22 20:06:21 +00:00
|
|
|
prefix = '{ NULL }'
|
2020-09-06 17:51:58 +00:00
|
|
|
end
|
2020-07-04 18:01:29 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
[ entry[:subtag].downcase.to_c_string,
|
|
|
|
entry[:description].to_u8_c_string,
|
2022-03-25 21:07:27 +00:00
|
|
|
prefix,
|
2022-03-26 11:59:01 +00:00
|
|
|
entry.key?(:deprecated).to_s,
|
2022-03-25 21:07:27 +00:00
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.format_extlangs_variants entries, type, name
|
|
|
|
rows = entries[type].map { |entry| self.format_one_extlang_variant entry }
|
2020-07-04 18:01:29 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
return entries[type].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
|
2022-03-25 21:07:27 +00:00
|
|
|
end
|
2020-07-04 18:01:29 +00:00
|
|
|
|
2022-03-25 21:07:27 +00:00
|
|
|
def self.format_one_grandfathered entry
|
2022-04-22 20:06:21 +00:00
|
|
|
[ entry[:tag].to_c_string,
|
|
|
|
entry[:description].to_u8_c_string,
|
|
|
|
'{ NULL }',
|
2022-03-26 11:59:01 +00:00
|
|
|
'true',
|
2022-03-25 21:07:27 +00:00
|
|
|
]
|
|
|
|
end
|
2022-03-24 21:39:32 +00:00
|
|
|
|
2022-03-25 21:07:27 +00:00
|
|
|
def self.format_grandfathered entries
|
|
|
|
rows = entries["grandfathered"].map { |entry| self.format_one_grandfathered entry }
|
2022-03-24 21:39:32 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
return entries["grandfathered"].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
|
2022-03-25 21:07:27 +00:00
|
|
|
end
|
|
|
|
|
2022-03-25 23:21:58 +00:00
|
|
|
def self.preferred_value_type_original type, pv
|
|
|
|
return %r{-}.match(pv) ? :tag : type.to_sym
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.preferred_value_type_target type, pv
|
|
|
|
return %r{-|^[a-z]{2,3}$}.match(pv) ? :tag : type.to_sym
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.format_one_preferred_value_construction pv_type, pv
|
2022-04-22 20:06:21 +00:00
|
|
|
pv_str = pv.to_c_string
|
|
|
|
|
|
|
|
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
|
|
|
|
region = :region == pv_type ? pv_str : "NULL"
|
|
|
|
variant = :variant == pv_type ? pv_str : "NULL"
|
2022-03-25 21:07:27 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
|
2022-03-25 21:07:27 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
[ "{ #{tag}", region, variant, "}" ]
|
2022-03-25 23:21:58 +00:00
|
|
|
end
|
2022-03-25 21:07:27 +00:00
|
|
|
|
2022-03-25 23:21:58 +00:00
|
|
|
def self.format_one_preferred_value_target type, pv
|
|
|
|
pv_type = self.preferred_value_type type, pv
|
2022-04-22 20:06:21 +00:00
|
|
|
pv_str = pv.to_c_string
|
2022-03-24 21:39:32 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
|
|
|
|
region = :region == pv_type ? pv_str : "NULL"
|
|
|
|
variant = :variant == pv_type ? pv_str : "NULL"
|
2022-03-25 23:21:58 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
|
|
|
|
|
|
|
|
[ "{ #{tag}", region, variant, "}" ]
|
2022-03-25 23:21:58 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.format_one_preferred_value entry
|
2022-04-22 20:06:21 +00:00
|
|
|
return self.format_one_preferred_value_construction(self.preferred_value_type_original(entry[:type], entry[:original_value]), entry[:original_value]) \
|
|
|
|
+ self.format_one_preferred_value_construction(self.preferred_value_type_target( entry[:type], entry[:preferred_value]), entry[:preferred_value])
|
2022-03-25 23:21:58 +00:00
|
|
|
end
|
|
|
|
|
2022-03-28 16:45:19 +00:00
|
|
|
def self.format_one_preferred_value_isdcf entry
|
2022-04-22 20:06:21 +00:00
|
|
|
return self.format_one_preferred_value_construction(:tag, entry["dcncTag"]) \
|
|
|
|
+ self.format_one_preferred_value_construction(:tag, entry["rfc5646Tag"])
|
2022-03-28 16:45:19 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.format_preferred_values entries, isdcf_entries
|
2022-03-25 23:21:58 +00:00
|
|
|
rows = entries.
|
|
|
|
values.
|
|
|
|
map { |v| v.select { |e| e.key?(:preferred_value) } }.
|
|
|
|
flatten.
|
|
|
|
map { |e| e[:original_value] = (e.key?(:prefix) ? e[:prefix].first + "-" : "") + (e[:subtag] || e[:tag]); e }.
|
|
|
|
sort_by { |e| [ 10 - e[:original_value].gsub(%r{[^-]+}, '').length, e[:original_value].downcase ] }.
|
|
|
|
map { |e| self.format_one_preferred_value e }
|
|
|
|
|
2022-03-28 16:45:19 +00:00
|
|
|
rows += isdcf_entries.
|
2022-05-04 18:54:18 +00:00
|
|
|
select { |e| %r{^Q[A-T][A-Z]$}.match(e["dcncTag"] || "") and !e["rfc5646Tag"].blank? }.
|
2022-03-28 16:45:19 +00:00
|
|
|
map { |e| self.format_one_preferred_value_isdcf e }
|
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
return rows.size, format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
|
2022-03-25 23:21:58 +00:00
|
|
|
end
|
|
|
|
|
2022-03-29 19:15:28 +00:00
|
|
|
def self.format_suppress_scripts entries
|
|
|
|
rows = (entries["language"] + entries["extlang"]).
|
|
|
|
select { |e| !e[:suppress_script].blank? }.
|
|
|
|
map { |e| [ e[:tag] || e[:subtag], e[:suppress_script] ] }.
|
|
|
|
sort.
|
|
|
|
uniq.
|
2022-04-22 20:06:21 +00:00
|
|
|
map { |p| p.map(&:to_c_string) }
|
|
|
|
|
|
|
|
return rows.size, format_table(rows, :column_suffix => ",", :row_prefix => " { ", :row_suffix => " },").join("\n")
|
|
|
|
end
|
2022-03-29 19:15:28 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
def self.calculate_max_num_prefixes entries
|
|
|
|
%w{extlang variant}.
|
|
|
|
map { |type| entries[type] }.
|
|
|
|
flatten.
|
|
|
|
map { |entry| (entry[:prefix] || []).length }.
|
|
|
|
max
|
2022-03-29 19:15:28 +00:00
|
|
|
end
|
|
|
|
|
2022-03-28 16:45:19 +00:00
|
|
|
def self.do_create_cpp entries, isdcf_entries
|
2022-03-25 23:21:58 +00:00
|
|
|
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
|
2020-07-04 18:01:29 +00:00
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
content_of = Hash.new
|
|
|
|
content_of[:max_num_prefixes] = self.calculate_max_num_prefixes(entries)
|
|
|
|
content_of[:num_extlangs], content_of[:extlangs_init] = self.format_extlangs_variants(entries, "extlang", "extlangs")
|
|
|
|
content_of[:num_variants], content_of[:variants_init] = self.format_extlangs_variants(entries, "variant", "variants")
|
|
|
|
content_of[:num_suppress_scripts], content_of[:suppress_scripts_init] = self.format_suppress_scripts(entries)
|
|
|
|
content_of[:num_grandfathered], content_of[:grandfathered_init] = self.format_grandfathered(entries)
|
|
|
|
content_of[:num_preferred_values], content_of[:preferred_values_init] = self.format_preferred_values(entries, isdcf_entries)
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2022-03-29 17:56:14 +00:00
|
|
|
content = ERB.new(@@list_cpp_content).result(binding)
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-06 17:51:58 +00:00
|
|
|
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
|
|
|
|
end
|
2020-07-01 17:29:09 +00:00
|
|
|
|
2020-09-06 17:51:58 +00:00
|
|
|
def self.create_cpp
|
2022-03-28 16:45:19 +00:00
|
|
|
do_create_cpp(self.fetch_registry, self.fetch_isdcf_languages)
|
2020-09-06 17:51:58 +00:00
|
|
|
end
|
2020-07-01 17:29:09 +00:00
|
|
|
end
|