2020-06-28 13:45:18 +00:00
|
|
|
|
def create_iso3166_country_list_file
|
2021-07-17 11:50:07 +00:00
|
|
|
|
countries_regions = {}
|
|
|
|
|
iso3166_content = Mtx::OnlineFile.download("https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes")
|
|
|
|
|
|
|
|
|
|
parse_html_extract_table_data(iso3166_content, %r{^.*?<table[^>]*>}i).
|
|
|
|
|
drop(2).
|
|
|
|
|
reject { |row| row.length < 8 }.
|
|
|
|
|
each do |row|
|
|
|
|
|
|
|
|
|
|
row = row.map do |column|
|
|
|
|
|
column.
|
|
|
|
|
gsub(%r{<style>.*?</style>}, '').
|
|
|
|
|
gsub(%r{<a><img></a>}, ', ').
|
|
|
|
|
gsub(%r{<img>}, '').
|
|
|
|
|
gsub(%r{>_<}, '><').
|
|
|
|
|
gsub(%r{<[^>]+>}, '').
|
|
|
|
|
gsub(%r{ }, '').
|
|
|
|
|
gsub(%r{^, +}, '').
|
|
|
|
|
gsub(%r{[[:space:]]+}, ' ').
|
|
|
|
|
gsub(%r{^ +| +$}, '')
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# 0 ["Albania",
|
|
|
|
|
# 1 "The Republic of Albania",
|
|
|
|
|
# 2 "UN member state ",
|
|
|
|
|
# 3 "AL",
|
|
|
|
|
# 4 "ALB",
|
|
|
|
|
# 5 "008",
|
|
|
|
|
# 6 "ISO 3166-2:AL",
|
|
|
|
|
# 7 ".al"],
|
|
|
|
|
|
|
|
|
|
m49_code = row[5].to_i
|
|
|
|
|
name = row[0].
|
|
|
|
|
gsub(%r{ *\[.*?\]}, '').
|
|
|
|
|
gsub(%r{ *\(the?\)$}i, '').
|
|
|
|
|
gsub(%r{(,.+?),([^,]+)$}, '\1 and\2')
|
|
|
|
|
official_name = row[1].
|
|
|
|
|
gsub(%r{ *\[.*?\]}, '').
|
|
|
|
|
gsub(%r{ *\(.*?\)$}i, '').
|
|
|
|
|
gsub(%r{^The +}, '')
|
|
|
|
|
|
2022-03-26 10:59:05 +00:00
|
|
|
|
entry = {
|
2021-07-17 11:50:07 +00:00
|
|
|
|
:number => m49_code,
|
|
|
|
|
:alpha_2_code => row[3],
|
|
|
|
|
:alpha_3_code => row[4],
|
|
|
|
|
:name => name,
|
|
|
|
|
:official_name => name == official_name ? "" : official_name,
|
|
|
|
|
}
|
2022-03-26 10:59:05 +00:00
|
|
|
|
|
|
|
|
|
countries_regions[row[3]] = entry
|
|
|
|
|
countries_regions[m49_code] = entry
|
2021-07-17 11:50:07 +00:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# pp(countries_regions); exit 42
|
|
|
|
|
|
|
|
|
|
m49_content = Mtx::OnlineFile.download("https://unstats.un.org/unsd/methodology/m49/overview/", "m49_list.txt")
|
|
|
|
|
m49_data = parse_html_extract_table_data(m49_content, %r{^.*?<table[^>]+downloadTableEN[^>]*>}i)
|
|
|
|
|
headers = Hash[ *
|
|
|
|
|
m49_data.
|
2021-07-17 10:47:38 +00:00
|
|
|
|
shift.
|
|
|
|
|
each_with_index.
|
|
|
|
|
map { |text, idx| [ idx, text.downcase.gsub(%r{[^a-z0-9]+}, '_').gsub(%r{^_|_$}, '') ] }.
|
|
|
|
|
flatten
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
maybe_add = lambda do|row, type|
|
|
|
|
|
code = row["#{type}_code"].to_i
|
|
|
|
|
name = row["#{type}_name"]
|
|
|
|
|
|
|
|
|
|
return if name.blank? || countries_regions[code]
|
|
|
|
|
|
|
|
|
|
countries_regions[code] = {
|
|
|
|
|
:number => code,
|
|
|
|
|
:alpha_2_code => "",
|
|
|
|
|
:alpha_3_code => "",
|
|
|
|
|
:name => name,
|
|
|
|
|
:official_name => "",
|
|
|
|
|
}
|
2020-06-28 13:45:18 +00:00
|
|
|
|
end
|
2020-09-06 19:30:37 +00:00
|
|
|
|
|
2021-07-17 11:50:07 +00:00
|
|
|
|
m49_data.
|
2021-07-17 10:47:38 +00:00
|
|
|
|
map do |row|
|
|
|
|
|
Hash[ *
|
|
|
|
|
row.
|
|
|
|
|
each_with_index.
|
|
|
|
|
map { |text, idx| [ headers[idx], text ] }.
|
|
|
|
|
flatten
|
2020-09-07 15:49:06 +00:00
|
|
|
|
]
|
2021-07-17 10:47:38 +00:00
|
|
|
|
end.
|
|
|
|
|
each do |row|
|
|
|
|
|
|
|
|
|
|
%w{global region sub_region intermediate_region}.each { |type| maybe_add.call(row, type) }
|
|
|
|
|
|
2022-03-26 10:59:05 +00:00
|
|
|
|
code = row["m49_code"].to_i
|
|
|
|
|
entry = {
|
2021-07-17 10:47:38 +00:00
|
|
|
|
:number => code,
|
|
|
|
|
:alpha_2_code => row["iso_alpha2_code"],
|
|
|
|
|
:alpha_3_code => row["iso_alpha3_code"],
|
|
|
|
|
:name => row["country_or_area"],
|
2021-07-17 11:50:07 +00:00
|
|
|
|
:official_name => "",
|
2021-07-17 10:47:38 +00:00
|
|
|
|
}
|
2022-03-26 10:59:05 +00:00
|
|
|
|
|
|
|
|
|
countries_regions[code] ||= entry
|
|
|
|
|
countries_regions[row["iso_alpha2_code"]] ||= entry
|
2020-09-06 19:30:37 +00:00
|
|
|
|
end
|
|
|
|
|
|
2021-07-11 09:22:41 +00:00
|
|
|
|
user_assigned = [ 'AA', 'ZZ' ] \
|
|
|
|
|
+ ('M'..'Z').map { |letter| "Q#{letter}" } \
|
|
|
|
|
+ ('A'..'Z').map { |letter| "X#{letter}" }
|
|
|
|
|
|
2022-03-26 10:59:05 +00:00
|
|
|
|
user_assigned.each do |code|
|
|
|
|
|
countries_regions[code] = {
|
2021-07-17 10:47:38 +00:00
|
|
|
|
:number => 0,
|
|
|
|
|
:alpha_2_code => code,
|
|
|
|
|
:alpha_3_code => "",
|
|
|
|
|
:name => "User-assigned",
|
|
|
|
|
:official_name => "",
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
|
2022-03-26 10:59:05 +00:00
|
|
|
|
Mtx::IANALanguageSubtagRegistry.
|
|
|
|
|
fetch_registry["region"].
|
|
|
|
|
reject { |entry| %r{\.\.}.match(entry[:subtag]) }.
|
|
|
|
|
each do |entry|
|
|
|
|
|
if %r{^[0-9]+$}.match(entry[:subtag])
|
|
|
|
|
number = entry[:subtag].gsub(%r{^0+}, '').to_i
|
|
|
|
|
code = ""
|
|
|
|
|
idx = number
|
|
|
|
|
else
|
|
|
|
|
number = 0
|
|
|
|
|
code = entry[:subtag]
|
|
|
|
|
idx = code
|
|
|
|
|
end
|
|
|
|
|
|
2022-03-26 11:51:30 +00:00
|
|
|
|
if !countries_regions.key?(idx)
|
|
|
|
|
countries_regions[idx] = {
|
|
|
|
|
:number => number,
|
|
|
|
|
:alpha_2_code => code,
|
|
|
|
|
:alpha_3_code => "",
|
|
|
|
|
:name => entry[:description],
|
|
|
|
|
:official_name => "",
|
|
|
|
|
}
|
|
|
|
|
end
|
2022-03-26 10:59:05 +00:00
|
|
|
|
|
2022-03-26 11:51:30 +00:00
|
|
|
|
countries_regions[idx][:deprecated] = entry.key?(:deprecated)
|
2022-03-26 10:59:05 +00:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
rows = countries_regions.
|
|
|
|
|
values.
|
|
|
|
|
uniq.
|
|
|
|
|
sort_by { |entry| [ entry[:alpha_2_code], entry[:alpha_3_code], entry[:number] ] }.
|
2021-07-17 10:47:38 +00:00
|
|
|
|
map do |entry|
|
2022-04-22 20:06:21 +00:00
|
|
|
|
[ entry[:alpha_2_code].upcase.to_c_string,
|
|
|
|
|
entry[:alpha_3_code].upcase.to_c_string,
|
2021-07-17 10:47:38 +00:00
|
|
|
|
sprintf('%3d', entry[:number]),
|
2022-04-22 20:06:21 +00:00
|
|
|
|
entry[:name].to_u8_c_string,
|
|
|
|
|
entry[:official_name].to_u8_c_string,
|
2022-03-26 11:51:30 +00:00
|
|
|
|
(entry[:deprecated] || false).to_s,
|
2021-07-11 09:22:41 +00:00
|
|
|
|
]
|
|
|
|
|
end
|
|
|
|
|
|
2020-06-28 13:45:18 +00:00
|
|
|
|
header = <<EOT
|
|
|
|
|
/*
|
|
|
|
|
mkvmerge -- utility for splicing together matroska files
|
|
|
|
|
from component media subtypes
|
|
|
|
|
|
|
|
|
|
Distributed under the GPL v2
|
|
|
|
|
see the file COPYING for details
|
2020-08-01 16:03:54 +00:00
|
|
|
|
or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
|
2020-06-28 13:45:18 +00:00
|
|
|
|
|
2020-09-06 18:20:51 +00:00
|
|
|
|
ISO 3166 countries & UN M.49 regions
|
2020-06-28 13:45:18 +00:00
|
|
|
|
|
|
|
|
|
Written by Moritz Bunkus <moritz@bunkus.org>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
|
// NOTE: this file is auto-generated by the "dev:iso3166_list" rake target.
|
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#include "common/common_pch.h"
|
|
|
|
|
|
|
|
|
|
#include "common/iso3166.h"
|
|
|
|
|
|
|
|
|
|
namespace mtx::iso3166 {
|
|
|
|
|
|
2021-01-25 23:42:56 +00:00
|
|
|
|
std::vector<region_t> g_regions;
|
|
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
|
struct region_init_t {
|
|
|
|
|
char const *alpha_2_code, *alpha_3_code;
|
|
|
|
|
unsigned int number;
|
|
|
|
|
char const *name, *official_name;
|
|
|
|
|
bool is_deprecated;
|
|
|
|
|
};
|
2021-01-25 23:42:56 +00:00
|
|
|
|
|
2022-04-22 20:06:21 +00:00
|
|
|
|
static region_init_t const s_regions_init[] = {
|
2020-06-28 13:45:18 +00:00
|
|
|
|
EOT
|
|
|
|
|
|
|
|
|
|
footer = <<EOT
|
2022-04-22 20:06:21 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
init() {
|
|
|
|
|
g_regions.reserve(#{rows.size});
|
|
|
|
|
|
|
|
|
|
for (region_init_t const *region = s_regions_init, *end = region + #{rows.size}; region < end; ++region)
|
|
|
|
|
g_regions.emplace_back(region->alpha_2_code, region->alpha_3_code, region->number, region->name, region->official_name, region->is_deprecated);
|
2021-01-25 23:42:56 +00:00
|
|
|
|
}
|
2020-06-28 13:45:18 +00:00
|
|
|
|
|
|
|
|
|
} // namespace mtx::iso3166
|
|
|
|
|
EOT
|
|
|
|
|
|
2021-07-17 10:47:38 +00:00
|
|
|
|
rows = rows.sort_by { |row| [ row[0], row[1], row[3] ].join('::') }
|
2022-04-22 20:06:21 +00:00
|
|
|
|
content = header + format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n") + "\n" + footer
|
2021-07-17 10:47:38 +00:00
|
|
|
|
cpp_file_name = "src/common/iso3166_country_list.cpp"
|
2020-06-28 13:45:18 +00:00
|
|
|
|
|
|
|
|
|
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
|
|
|
|
|
end
|