IANA language subtag registry: make registry download & parsing reusable

Part of the implementation of #2919.
This commit is contained in:
Moritz Bunkus 2020-09-06 19:51:58 +02:00
parent 83988c2eae
commit 19e61a2d2e
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
2 changed files with 71 additions and 63 deletions

View File

@ -870,7 +870,7 @@ namespace :dev do
desc "Create iana_language_subtag_registry_list.cpp from official list"
task :iana_language_subtag_registry_list do
create_iana_language_subtag_registry_list_file
Mtx::IANALanguageSubtagRegistry.create_cpp
end
end

View File

@ -1,69 +1,77 @@
def create_iana_language_subtag_registry_list_file
txt_file = "language-subtag-registry.txt"
url = "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry"
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
module Mtx::IANALanguageSubtagRegistry
def self.with_registry &func
url = "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry"
registry = "language-subtag-registry"
download_registry = !FileTest.exists?(registry)
File.unlink(txt_file) if FileTest.exists?(txt_file)
runq "wget", url, "wget --quiet -O #{registry} #{url}" if download_registry
runq "wget", url, "wget --quiet -O #{txt_file} #{url}"
entry = {}
entries = {}
process = lambda do
type = entry.delete(:type)
entry = {}
entries = {}
process = lambda do
type = entry.delete(:type)
if type
entries[type] ||= []
entries[type] << entry
end
if type
entries[type] ||= []
entries[type] << entry
entry = {}
end
entry = {}
IO.readlines(registry).
map(&:chomp).
each do |line|
if line == '%%'
process.call
elsif %r{^(Type|Subtag|Description): *(.+)}i.match(line)
entry[$1.downcase.to_sym] = $2
elsif %r{^Prefix: *(.+)}i.match(line)
entry[:prefix] ||= []
entry[:prefix] << $1
end
end
process.call
func.call(entries)
ensure
File.unlink(registry) if download_registry && FileTest.exists?(registry)
end
IO.readlines(txt_file).
map(&:chomp).
each do |line|
def self.do_create_cpp entries
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
entry_formatter = lambda do |entry|
if entry[:prefix]
prefix = '{ ' + entry[:prefix].sort.map(&:to_cpp_string).join(', ') + ' }'
else
prefix = '{}'
end
if line == '%%'
process.call
elsif %r{^(Type|Subtag|Description): *(.+)}i.match(line)
entry[$1.downcase.to_sym] = $2
elsif %r{^Prefix: *(.+)}i.match(line)
entry[:prefix] ||= []
entry[:prefix] << $1
end
end
process.call
formatter = lambda do |entry|
if entry[:prefix]
prefix = '{ ' + entry[:prefix].sort.map(&:to_cpp_string).join(', ') + ' }'
else
prefix = '{}'
[ entry[:subtag].downcase.to_cpp_string,
entry[:description].to_u8_cpp_string,
prefix,
]
end
[ entry[:subtag].downcase.to_cpp_string,
entry[:description].to_u8_cpp_string,
prefix,
formatter = lambda do |type, name|
rows = entries[type].map(&entry_formatter)
"std::vector<entry_t> const g_#{name}{\n" +
format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") +
"\n};\n"
end
formatted = [
formatter.call("extlang", "extlangs"),
formatter.call("variant", "variants"),
]
end
rows = entries["extlang"].map(&formatter)
extlangs = "std::vector<entry_t> const g_extlangs{\n" +
format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") +
"\n};\n"
rows = entries["variant"].map(&formatter)
variants = "std::vector<entry_t> const g_variants{\n" +
format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") +
"\n};\n"
header = <<EOT
header = <<EOT
/*
mkvmerge -- utility for splicing together matroska files
from component media subtypes
@ -89,19 +97,19 @@ namespace mtx::iana::language_subtag_registry {
EOT
footer = <<EOT
footer = <<EOT
} // namespace mtx::iana::language_subtag_registry
EOT
content = header +
extlangs +
"\n" +
variants +
footer
content = header +
formatted.join("\n") +
footer
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
end
ensure
File.unlink(txt_file) if FileTest.exists?(txt_file)
def self.create_cpp
self.with_registry { |entries| do_create_cpp(entries) }
end
end