def create_iso3166_country_list_file countries_regions = {} iso3166_content = Mtx::OnlineFile.download("https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes") parse_html_extract_table_data(iso3166_content, %r{^.*?]*>}i). drop(2). reject { |row| row.length < 8 }. each do |row| row = row.map do |column| column. gsub(%r{}, ''). gsub(%r{}, ', '). gsub(%r{}, ''). gsub(%r{>_<}, '><'). gsub(%r{<[^>]+>}, ''). gsub(%r{ }, ''). gsub(%r{^, +}, ''). gsub(%r{[[:space:]]+}, ' '). gsub(%r{^ +| +$}, '') end # 0 ["Albania", # 1 "The Republic of Albania", # 2 "UN member state ", # 3 "AL", # 4 "ALB", # 5 "008", # 6 "ISO 3166-2:AL", # 7 ".al"], m49_code = row[5].to_i name = row[0]. gsub(%r{ *\[.*?\]}, ''). gsub(%r{ *\(the?\)$}i, ''). gsub(%r{(,.+?),([^,]+)$}, '\1 and\2') official_name = row[1]. gsub(%r{ *\[.*?\]}, ''). gsub(%r{ *\(.*?\)$}i, ''). gsub(%r{^The +}, '') entry = { :number => m49_code, :alpha_2_code => row[3], :alpha_3_code => row[4], :name => name, :official_name => name == official_name ? "" : official_name, } countries_regions[row[3]] = entry countries_regions[m49_code] = entry end # pp(countries_regions); exit 42 m49_content = Mtx::OnlineFile.download("https://unstats.un.org/unsd/methodology/m49/overview/", "m49_list.txt") m49_data = parse_html_extract_table_data(m49_content, %r{^.*?]+downloadTableEN[^>]*>}i) headers = Hash[ * m49_data. shift. each_with_index. map { |text, idx| [ idx, text.downcase.gsub(%r{[^a-z0-9]+}, '_').gsub(%r{^_|_$}, '') ] }. flatten ] maybe_add = lambda do|row, type| code = row["#{type}_code"].to_i name = row["#{type}_name"] return if name.blank? || countries_regions[code] countries_regions[code] = { :number => code, :alpha_2_code => "", :alpha_3_code => "", :name => name, :official_name => "", } end m49_data. map do |row| Hash[ * row. each_with_index. map { |text, idx| [ headers[idx], text ] }. flatten ] end. each do |row| %w{global region sub_region intermediate_region}.each { |type| maybe_add.call(row, type) } code = row["m49_code"].to_i entry = { :number => code, :alpha_2_code => row["iso_alpha2_code"], :alpha_3_code => row["iso_alpha3_code"], :name => row["country_or_area"], :official_name => "", } countries_regions[code] ||= entry countries_regions[row["iso_alpha2_code"]] ||= entry end user_assigned = [ 'AA', 'ZZ' ] \ + ('M'..'Z').map { |letter| "Q#{letter}" } \ + ('A'..'Z').map { |letter| "X#{letter}" } user_assigned.each do |code| countries_regions[code] = { :number => 0, :alpha_2_code => code, :alpha_3_code => "", :name => "User-assigned", :official_name => "", } end Mtx::IANALanguageSubtagRegistry. fetch_registry["region"]. reject { |entry| %r{\.\.}.match(entry[:subtag]) }. each do |entry| if %r{^[0-9]+$}.match(entry[:subtag]) number = entry[:subtag].gsub(%r{^0+}, '').to_i code = "" idx = number else number = 0 code = entry[:subtag] idx = code end if !countries_regions.key?(idx) countries_regions[idx] = { :number => number, :alpha_2_code => code, :alpha_3_code => "", :name => entry[:description], :official_name => "", } end countries_regions[idx][:deprecated] = entry.key?(:deprecated) end rows = countries_regions. values. uniq. sort_by { |entry| [ entry[:alpha_2_code], entry[:alpha_3_code], entry[:number] ] }. map do |entry| [ entry[:alpha_2_code].upcase.to_c_string, entry[:alpha_3_code].upcase.to_c_string, sprintf('%3d', entry[:number]), entry[:name].to_u8_c_string, entry[:official_name].to_u8_c_string, (entry[:deprecated] || false).to_s, ] end header = <. */ // ------------------------------------------------------------------------ // NOTE: this file is auto-generated by the "dev:iso3166_list" rake target. // ------------------------------------------------------------------------ #include "common/common_pch.h" #include "common/iso3166.h" namespace mtx::iso3166 { std::vector g_regions; struct region_init_t { char const *alpha_2_code, *alpha_3_code; unsigned int number; char const *name, *official_name; bool is_deprecated; }; static region_init_t const s_regions_init[] = { EOT footer = <alpha_2_code, region->alpha_3_code, region->number, region->name, region->official_name, region->is_deprecated); } } // namespace mtx::iso3166 EOT rows = rows.sort_by { |row| [ row[0], row[1], row[3] ].join('::') } content = header + format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") + "\n" + footer cpp_file_name = "src/common/iso3166_country_list.cpp" runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 } end