library(tidyverse)
library(readxl)
library(rnaturalearth)
# Read your Excel file
<- read_excel("c-erps-2cols-v2.xlsx", sheet = "2024n")
your_data
# Get the country data from rnaturalearth
<- ne_countries(scale = "medium", returnclass = "sf")
world_data
# Function to find the best match
<- function(country, reference_list) {
find_best_match <- adist(country, reference_list)
distances <- reference_list[which.min(distances)]
best_match return(best_match)
}
# Apply the function to your data
<- your_data %>%
your_data mutate(matched_country = map_chr(country, ~find_best_match(.x, world_data$name)))
# View the matches
view(your_data %>% select(country, matched_country))
# Create a manual correction list for the Jersey mismatch
<- tibble(
manual_corrections original = c("Jersey (States of)", "Yemen, Republic"),
corrected = c("Jersey", "Yemen")
)
# Apply manual corrections
<- your_data %>%
your_data left_join(manual_corrections, by = c("country" = "original")) %>%
mutate(final_country = coalesce(corrected, matched_country)) %>%
select(-corrected, -matched_country)
# Join with rnaturalearth data
<- your_data %>%
joined_data left_join(world_data, by = c("final_country" = "name"))
# Function to find unmatched countries
<- function(matched_data, reference_data) {
find_unmatched_countries <- setdiff(reference_data$name, matched_data$final_country)
unmatched return(unmatched)
}
# Get the list of unmatched countries
<- find_unmatched_countries(your_data, world_data)
unmatched_countries
# Print the unmatched countries
cat("Unmatched countries:\n")
Unmatched countries:
print(unmatched_countries)
[1] "Afghanistan" "Anguilla"
[3] "Aland" "American Samoa"
[5] "Antarctica" "Ashmore and Cartier Is."
[7] "Fr. S. Antarctic Lands" "Antigua and Barb."
[9] "Burundi" "St-Barthélemy"
[11] "Bhutan" "Central African Rep."
[13] "Congo" "Comoros"
[15] "N. Cyprus" "Djibouti"
[17] "Dominica" "Eritrea"
[19] "Falkland Is." "Faeroe Is."
[21] "Micronesia" "Eq. Guinea"
[23] "Grenada" "Greenland"
[25] "Guam" "Heard I. and McDonald Is."
[27] "Indian Ocean Ter." "Br. Indian Ocean Ter."
[29] "Siachen Glacier" "Kiribati"
[31] "St. Kitts and Nevis" "Kosovo"
[33] "Lao PDR" "Saint Lucia"
[35] "Lesotho" "St-Martin"
[37] "Monaco" "Marshall Is."
[39] "N. Mariana Is." "Mauritania"
[41] "New Caledonia" "Norfolk Island"
[43] "Niue" "Nepal"
[45] "Nauru" "Pitcairn Is."
[47] "Palau" "Puerto Rico"
[49] "Dem. Rep. Korea" "Palestine"
[51] "Fr. Polynesia" "W. Sahara"
[53] "S. Sudan" "S. Geo. and S. Sandw. Is."
[55] "Saint Helena" "San Marino"
[57] "Somaliland" "St. Pierre and Miquelon"
[59] "São Tomé and Principe" "Seychelles"
[61] "Chad" "Turkmenistan"
[63] "Timor-Leste" "Tonga"
[65] "Vatican" "British Virgin Is."
[67] "U.S. Virgin Is." "Vanuatu"
[69] "Wallis and Futuna Is." "Samoa"
# You can also save this to a file if needed
# write_lines(unmatched_countries, "unmatched_countries.txt")