#!/usr/bin/perl -w

# -- How to get latest ISO-3166 country code data set --
#
# 1. Browse w/ Firefox to 'https://www.iso.org/obp/ui/#search/code/'
# 2. Select menu: "Tools" --> "Browser Tools" --> "Web Developer Tools", developer tools window will open at bottom of page, select "Network" tab
# 3. On page, top right, select "Results per page": 300
# 4. In developer tools window at bottom, right-click on last "POST" entry (Type "json", about 120 kByte)
# 5. Select "Copy" --> "Copy Response"
# 6. Paste into text file and save as 'iso-3166-1.json'
# 7. Run this script in the same directory to generate 'iso-3166-1.tab' and 'iso-3166-1_full.tab'
#

my $country_file = "iso-3166-1";
my $json_file = $country_file . ".json";
my $tab_full_file = $country_file . "_full.tab";
my $tab_short_file = $country_file . ".tab";

my @captions = (
	"Alpha-4 code", 
	"Start date", 
	"Code Type", 
	"Alpha-2 code", 
	"Notifiable", 
	"English short name", 
	"Code Short", 
	"Title", 
	"End date", 
	"Uri", 
	"Name Short", 
	"French short name", 
	"Numeric", 
	"Urn", 
	"Alpha-3 code", 
	"Status Remark", 
	"Remarks / New names (fr)", 
	"Content Holder", 
	"Remarks / New names (en)", 
	"Status"
);

# Open JSON dump
open JSON, "<${json_file}" or die "Can't open source file '${json_file}'\n";

# Create target files
open TAB_FULL, ">${tab_full_file}" or die "Can't write target file '${tab_full_file}'\n";
open TAB_SHORT, ">${tab_short_file}" or die "Can't write target file '${tab_short_file}'\n";

# UTF-8 Byte Order Mark (BOM)
print TAB_FULL pack("CCC", 0xEF, 0xBB, 0xBF);
print TAB_SHORT pack("CCC", 0xEF, 0xBB, 0xBF);

# Header row, tab separated
print TAB_FULL join("\t", @captions) . "\n";
print TAB_SHORT join("\t", ($captions[3], $captions[5])) . "\n";

# Parse JSON file
while (<JSON>) { # Note: will be one line only
	# Trim EOL, if any
	chomp;
	
	# Find all entries of countries
	my @entries = split "\{\"k\":";
	foreach $entry (@entries) {
		# Assure correct format, extract essential part
		if ($entry =~ /.*\,\"d\":\{(.*)\},\"cs\":\{.*/) {
			# Split up data set for one country
			my @row = ();
			for $item (split /,\"/, $1) { # "
				# Collect clean data into array 
				if ($item =~/\d+\":\"(.*)\"/) {
					push @row, $1;
				}
			}
			# Write row with data for country
			print TAB_FULL join("\t", @row) . "\n";
			print TAB_SHORT join("\t", ($row[3], $row[5])) . "\n";
		}
	}
}

# Done, clean up
close TAB_FULL;
close TAB_SHORT;
close JSON;

