From 19e61a2d2eba97a62e40c50406fbefea7f1b4db4 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 6 Sep 2020 19:51:58 +0200 Subject: [PATCH] IANA language subtag registry: make registry download & parsing reusable Part of the implementation of #2919. --- Rakefile | 2 +- rake.d/iana_language_subtag_registry.rb | 132 +++++++++++++----------- 2 files changed, 71 insertions(+), 63 deletions(-) diff --git a/Rakefile b/Rakefile index 39d036f7f..9c58da949 100644 --- a/Rakefile +++ b/Rakefile @@ -870,7 +870,7 @@ namespace :dev do desc "Create iana_language_subtag_registry_list.cpp from official list" task :iana_language_subtag_registry_list do - create_iana_language_subtag_registry_list_file + Mtx::IANALanguageSubtagRegistry.create_cpp end end diff --git a/rake.d/iana_language_subtag_registry.rb b/rake.d/iana_language_subtag_registry.rb index 9214eeb55..4edd0c795 100644 --- a/rake.d/iana_language_subtag_registry.rb +++ b/rake.d/iana_language_subtag_registry.rb @@ -1,69 +1,77 @@ -def create_iana_language_subtag_registry_list_file - txt_file = "language-subtag-registry.txt" - url = "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry" - cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp" +module Mtx::IANALanguageSubtagRegistry + def self.with_registry &func + url = "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry" + registry = "language-subtag-registry" + download_registry = !FileTest.exists?(registry) - File.unlink(txt_file) if FileTest.exists?(txt_file) + runq "wget", url, "wget --quiet -O #{registry} #{url}" if download_registry - runq "wget", url, "wget --quiet -O #{txt_file} #{url}" + entry = {} + entries = {} + process = lambda do + type = entry.delete(:type) - entry = {} - entries = {} - process = lambda do - type = entry.delete(:type) + if type + entries[type] ||= [] + entries[type] << entry + end - if type - entries[type] ||= [] - entries[type] << entry + entry = {} end - entry = {} + IO.readlines(registry). + map(&:chomp). + each do |line| + + if line == '%%' + process.call + + elsif %r{^(Type|Subtag|Description): *(.+)}i.match(line) + entry[$1.downcase.to_sym] = $2 + + elsif %r{^Prefix: *(.+)}i.match(line) + entry[:prefix] ||= [] + entry[:prefix] << $1 + end + end + + process.call + + func.call(entries) + + ensure + File.unlink(registry) if download_registry && FileTest.exists?(registry) end - IO.readlines(txt_file). - map(&:chomp). - each do |line| + def self.do_create_cpp entries + cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp" + entry_formatter = lambda do |entry| + if entry[:prefix] + prefix = '{ ' + entry[:prefix].sort.map(&:to_cpp_string).join(', ') + ' }' + else + prefix = '{}' + end - if line == '%%' - process.call - - elsif %r{^(Type|Subtag|Description): *(.+)}i.match(line) - entry[$1.downcase.to_sym] = $2 - - elsif %r{^Prefix: *(.+)}i.match(line) - entry[:prefix] ||= [] - entry[:prefix] << $1 - end - end - - process.call - - formatter = lambda do |entry| - if entry[:prefix] - prefix = '{ ' + entry[:prefix].sort.map(&:to_cpp_string).join(', ') + ' }' - else - prefix = '{}' + [ entry[:subtag].downcase.to_cpp_string, + entry[:description].to_u8_cpp_string, + prefix, + ] end - [ entry[:subtag].downcase.to_cpp_string, - entry[:description].to_u8_cpp_string, - prefix, + formatter = lambda do |type, name| + rows = entries[type].map(&entry_formatter) + + "std::vector const g_#{name}{\n" + + format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") + + "\n};\n" + end + + formatted = [ + formatter.call("extlang", "extlangs"), + formatter.call("variant", "variants"), ] - end - rows = entries["extlang"].map(&formatter) - - extlangs = "std::vector const g_extlangs{\n" + - format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") + - "\n};\n" - - rows = entries["variant"].map(&formatter) - - variants = "std::vector const g_variants{\n" + - format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") + - "\n};\n" - - header = <