mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-10-22 07:21:21 +00:00
ISO 639: generate language list directly from latest ISO lists
This commit is contained in:
parent
a351d7629f
commit
50505d3d1b
@ -1,31 +1,47 @@
|
||||
def create_iso639_language_list_file
|
||||
cpp_file_name = "src/common/iso639_language_list.cpp"
|
||||
iso639_2 = JSON.parse(IO.readlines("/usr/share/iso-codes/json/iso_639-2.json").join('')) \
|
||||
["639-2"].
|
||||
reject { |entry| %r{^qaa}.match(entry["alpha_3"]) }.
|
||||
map do |entry|
|
||||
entry["has_639_2"] = true
|
||||
entry["alpha_3_to_use"] = entry["bibliographic"] || entry["alpha_3"]
|
||||
entry
|
||||
end
|
||||
|
||||
used_codes = Hash[ *iso639_2.map { |entry| [ entry["alpha_3"], true, entry["bibliographic"], true ] }.flatten ]
|
||||
|
||||
JSON.parse(IO.readlines("/usr/share/iso-codes/json/iso_639-3.json").join('')) \
|
||||
["639-3"].
|
||||
reject { |entry| entry["type"] != "L" }.
|
||||
reject { |entry| used_codes.include?(entry["alpha_3"]) }.
|
||||
each do |entry|
|
||||
iso639_2 << {
|
||||
"name" => entry["name"],
|
||||
"alpha_3" => entry["alpha_3"],
|
||||
"alpha_3_to_use" => entry["alpha_3"],
|
||||
"has_639_2" => false,
|
||||
}
|
||||
end
|
||||
|
||||
rows = iso639_2.
|
||||
list_file = "iso-639-3"
|
||||
list_downloaded = false
|
||||
|
||||
if !FileTest.exists?(list_file)
|
||||
url = "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab"
|
||||
runq "wget", url, "wget --quiet -O #{list_file} #{url}"
|
||||
|
||||
list_downloaded = true
|
||||
end
|
||||
|
||||
lines = IO.readlines(list_file).map(&:chomp)
|
||||
headers = Hash[ *
|
||||
lines.
|
||||
shift.
|
||||
split(%r{\t}).
|
||||
map(&:downcase).
|
||||
each_with_index.
|
||||
map { |name, index| [ index, name ] }.
|
||||
flatten
|
||||
]
|
||||
|
||||
rows = lines.
|
||||
map do |line|
|
||||
parts = line.split(%r{\t})
|
||||
entry = Hash[ *
|
||||
(0..parts.size).
|
||||
map { |idx| [ headers[idx], !parts[idx] || parts[idx].empty? ? nil : parts[idx] ] }.
|
||||
flatten
|
||||
]
|
||||
|
||||
entry
|
||||
end.
|
||||
reject { |entry| !%r{^[CLS]$}.match(entry["language_type"]) }. # Constructed, Living & Special
|
||||
map do |entry|
|
||||
{
|
||||
"name" => entry["ref_name"],
|
||||
"bibliographic" => entry["part2b"] && (entry["part2b"] != entry["part2t"]) ? entry["part2b"] : nil,
|
||||
"alpha_2" => entry["part1"],
|
||||
"alpha_3" => entry["part2t"] || entry["id"],
|
||||
"alpha_3_to_use" => entry["part2b"] || entry["id"],
|
||||
"has_639_2" => !entry["part2b"].nil?,
|
||||
}
|
||||
end.map do |entry|
|
||||
[ entry["name"].to_u8_cpp_string,
|
||||
entry["alpha_3_to_use"].to_cpp_string,
|
||||
(entry["alpha_2"] || '').to_cpp_string,
|
||||
@ -81,7 +97,11 @@ EOT
|
||||
} // namespace mtx::iso639
|
||||
EOT
|
||||
|
||||
content = header + format_table(rows.sort, :column_suffix => ',', :row_prefix => " g_languages.emplace_back(", :row_suffix => ");").join("\n") + "\n" + footer
|
||||
content = header + format_table(rows.sort, :column_suffix => ',', :row_prefix => " g_languages.emplace_back(", :row_suffix => ");").join("\n") + "\n" + footer
|
||||
cpp_file_name = "src/common/iso639_language_list.cpp"
|
||||
|
||||
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
|
||||
|
||||
ensure
|
||||
File.unlink(list_file) if list_downloaded && FileTest.exists?(list_file)
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user