mkvtoolnix/rake.d/iana_language_subtag_registry.rb

334 lines
11 KiB
Ruby
Raw Permalink Normal View History

module Mtx::IANALanguageSubtagRegistry
@@list_cpp_content = <<EOERB
/*
mkvmerge -- utility for splicing together matroska files
from component media subtypes
Distributed under the GPL v2
see the file COPYING for details
or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
IANA language subtag registry
Written by Moritz Bunkus <moritz@bunkus.org>.
*/
// ----------------------------------------------------------------------------------------------
// NOTE: this file is auto-generated by the "dev:iana_language_subtag_registry_list" rake target.
// ----------------------------------------------------------------------------------------------
#include "common/common_pch.h"
#include "common/bcp47.h"
#include "common/iana_language_subtag_registry.h"
namespace mtx::iana::language_subtag_registry {
std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
std::vector<std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c>> g_preferred_values;
std::unordered_map<std::string, std::string> g_suppress_scripts;
using VS = std::vector<std::string>;
struct extlang_variant_init_t {
char const *code, *description;
char const *prefixes[<%= content_of[:max_num_prefixes] + 1 %>];
bool is_deprecated;
};
struct suppress_script_init_t {
char const *first, *second;
};
struct preferred_values_init_t {
struct sub_t {
char const *tag, *region, *variant;
mtx::bcp47::language_c parse() const;
};
sub_t from, to;
};
mtx::bcp47::language_c
preferred_values_init_t::sub_t::parse()
const {
auto language = tag ? mtx::bcp47::language_c::parse(tag) : mtx::bcp47::language_c{};
if (region)
language.set_region(region);
if (variant)
language.set_variants({ variant });
if (!tag)
language.set_valid(true);
return language;
}
static extlang_variant_init_t s_extlangs_init[] = {
<%= content_of[:extlangs_init] %>
};
static extlang_variant_init_t s_variants_init[] = {
<%= content_of[:variants_init] %>
};
static extlang_variant_init_t s_grandfathered_init[] = {
<%= content_of[:grandfathered_init] %>
};
static suppress_script_init_t s_suppress_scripts_init[] = {
<%= content_of[:suppress_scripts_init] %>
};
static preferred_values_init_t s_preferred_values_init[] = {
<%= content_of[:preferred_values_init] %>
};
void
init() {
g_extlangs.reserve(<%= content_of[:num_extlangs] %>);
for (auto const *extlang = s_extlangs_init, *end = extlang + <%= content_of[:num_extlangs] %>; extlang < end; ++extlang) {
g_extlangs.emplace_back(extlang->code, extlang->description, extlang->is_deprecated);
auto &new_extlang = g_extlangs.back();
for (auto prefix = extlang->prefixes; *prefix; ++prefix)
new_extlang.prefixes.emplace_back(*prefix);
}
g_variants.reserve(<%= content_of[:num_variants] %>);
for (auto const *variant = s_variants_init, *end = variant + <%= content_of[:num_variants] %>; variant < end; ++variant) {
g_variants.emplace_back(variant->code, variant->description, variant->is_deprecated);
auto &new_variant = g_variants.back();
for (auto prefix = variant->prefixes; *prefix; ++prefix)
new_variant.prefixes.emplace_back(*prefix);
}
g_suppress_scripts.reserve(<%= content_of[:num_suppress_scripts] %>);
for (auto const *suppress_script = s_suppress_scripts_init, *end = suppress_script + <%= content_of[:num_suppress_scripts] %>; suppress_script < end; ++suppress_script)
g_suppress_scripts.insert_or_assign(suppress_script->first, suppress_script->second);
g_grandfathered.reserve(<%= content_of[:num_grandfathered] %>);
for (auto const *grandfathered = s_grandfathered_init, *end = grandfathered + <%= content_of[:num_grandfathered] %>; grandfathered < end; ++grandfathered)
g_grandfathered.emplace_back(grandfathered->code, grandfathered->description, grandfathered->is_deprecated);
}
void
init_preferred_values() {
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::none);
g_preferred_values.reserve(<%= content_of[:num_preferred_values] %>);
for (auto const *preferred_value = s_preferred_values_init, *end = preferred_value + <%= content_of[:num_preferred_values] %>; preferred_value < end; ++preferred_value)
g_preferred_values.emplace_back(preferred_value->from.parse(), preferred_value->to.parse());
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::default_mode);
}
} // namespace mtx::iana::language_subtag_registry
EOERB
@@registry_mutex = Mutex.new
@@registry = nil
def self.fetch_registry
@@registry_mutex.synchronize {
return @@registry if @@registry
shorten_description_for = %w{1959acad abl1943 ao1990 colb1945}
@@registry = {}
entry = {}
process = lambda do
type = entry[:type]
if shorten_description_for.include? entry[:subtag]
entry[:description].gsub!(%r{ +\(.*?\)}, '')
end
if type
@@registry[type] ||= []
@@registry[type] << entry
end
entry = {}
end
current_sym = nil
Mtx::OnlineFile.download("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry").
split(%r{\n+}).
map(&:chomp).
each do |line|
if line == '%%'
process.call
current_sym = nil
elsif %r{^Prefix: *(.+)}i.match(line)
entry[:prefix] ||= []
entry[:prefix] << $1
current_sym = nil
elsif %r{^(.*?): *(.+)}i.match(line)
key, value = $1, $2
current_sym = key.downcase.gsub(%r{-}, '_').to_sym
entry[current_sym] = value
elsif %r{^ +(.+)}.match(line) && current_sym
entry[current_sym] += " #{$1}"
end
end
process.call
}
return @@registry
end
def self.fetch_isdcf_languages
JSON.parse(Mtx::OnlineFile.download("https://registry.isdcf.com/languages", "isdcf_languages"))["data"]
end
def self.format_one_extlang_variant entry
if entry[:prefix]
prefix = '{ ' + entry[:prefix].sort.map(&:to_c_string).join(', ') + ', NULL }'
else
prefix = '{ NULL }'
end
[ entry[:subtag].downcase.to_c_string,
entry[:description].to_u8_c_string,
prefix,
entry.key?(:deprecated).to_s,
]
end
def self.format_extlangs_variants entries, type, name
rows = entries[type].map { |entry| self.format_one_extlang_variant entry }
return entries[type].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.format_one_grandfathered entry
[ entry[:tag].to_c_string,
entry[:description].to_u8_c_string,
'{ NULL }',
'true',
]
end
def self.format_grandfathered entries
rows = entries["grandfathered"].map { |entry| self.format_one_grandfathered entry }
return entries["grandfathered"].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.preferred_value_type_original type, pv
return %r{-}.match(pv) ? :tag : type.to_sym
end
def self.preferred_value_type_target type, pv
return %r{-|^[a-z]{2,3}$}.match(pv) ? :tag : type.to_sym
end
def self.format_one_preferred_value_construction pv_type, pv
pv_str = pv.to_c_string
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
region = :region == pv_type ? pv_str : "NULL"
variant = :variant == pv_type ? pv_str : "NULL"
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
[ "{ #{tag}", region, variant, "}" ]
end
def self.format_one_preferred_value_target type, pv
pv_type = self.preferred_value_type type, pv
pv_str = pv.to_c_string
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
region = :region == pv_type ? pv_str : "NULL"
variant = :variant == pv_type ? pv_str : "NULL"
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
[ "{ #{tag}", region, variant, "}" ]
end
def self.format_one_preferred_value entry
return self.format_one_preferred_value_construction(self.preferred_value_type_original(entry[:type], entry[:original_value]), entry[:original_value]) \
+ self.format_one_preferred_value_construction(self.preferred_value_type_target( entry[:type], entry[:preferred_value]), entry[:preferred_value])
end
def self.format_one_preferred_value_isdcf entry
return self.format_one_preferred_value_construction(:tag, entry["dcncTag"]) \
+ self.format_one_preferred_value_construction(:tag, entry["rfc5646Tag"])
end
def self.format_preferred_values entries, isdcf_entries
rows = entries.
values.
map { |v| v.select { |e| e.key?(:preferred_value) } }.
flatten.
map { |e| e[:original_value] = (e.key?(:prefix) ? e[:prefix].first + "-" : "") + (e[:subtag] || e[:tag]); e }.
sort_by { |e| [ 10 - e[:original_value].gsub(%r{[^-]+}, '').length, e[:original_value].downcase ] }.
map { |e| self.format_one_preferred_value e }
rows += isdcf_entries.
select { |e| %r{^Q[A-T][A-Z]$}.match(e["dcncTag"] || "") and !e["rfc5646Tag"].blank? }.
map { |e| self.format_one_preferred_value_isdcf e }
return rows.size, format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.format_suppress_scripts entries
rows = (entries["language"] + entries["extlang"]).
select { |e| !e[:suppress_script].blank? }.
map { |e| [ e[:tag] || e[:subtag], e[:suppress_script] ] }.
sort.
uniq.
map { |p| p.map(&:to_c_string) }
return rows.size, format_table(rows, :column_suffix => ",", :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.calculate_max_num_prefixes entries
%w{extlang variant}.
map { |type| entries[type] }.
flatten.
map { |entry| (entry[:prefix] || []).length }.
max
end
def self.do_create_cpp entries, isdcf_entries
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
content_of = Hash.new
content_of[:max_num_prefixes] = self.calculate_max_num_prefixes(entries)
content_of[:num_extlangs], content_of[:extlangs_init] = self.format_extlangs_variants(entries, "extlang", "extlangs")
content_of[:num_variants], content_of[:variants_init] = self.format_extlangs_variants(entries, "variant", "variants")
content_of[:num_suppress_scripts], content_of[:suppress_scripts_init] = self.format_suppress_scripts(entries)
content_of[:num_grandfathered], content_of[:grandfathered_init] = self.format_grandfathered(entries)
content_of[:num_preferred_values], content_of[:preferred_values_init] = self.format_preferred_values(entries, isdcf_entries)
content = ERB.new(@@list_cpp_content).result(binding)
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
end
def self.create_cpp
do_create_cpp(self.fetch_registry, self.fetch_isdcf_languages)
end
end