# *** In a terminal ***
# Get the up-to-date source file
wget https://covid.ourworldindata.org/data/owid-covid-data.csv
# Put the header in a separate file
head -n1 owid-covid-data.csv > 13oct
# Keep only lines referring to October 13
grep ".*,.*,.*,2020-10-13" owid-covid-data.csv >> 13oct

# *** Now in R ***
# Get the restricted/completed dataset + the full dataset
d <- read.csv("https://auder.net/miage/seance3-4/data/owid-covid-data_EDIT_13-10-2020.csv")
full_d <- read.csv("13oct")
# The character "'" seemingly has an issue with R CSV reading
ic_index <- which.max(full_d[,3] == "Cote d'ivoire")
full_d[ic_index,3] <- "Ivory Coast"
# Some countries might be removed from the source (Russia, Afghanistan...)
keep_idx <- (1:nrow(d))[d[,3] %in% full_d[,3]]
d <- d[keep_idx,]
# Reorder rows, in alphabetic (lexicographic) order
sorted_idx <- sort(d[,3], index.return=TRUE)$ix
d <- d[sorted_idx,]
# Restrict full_d to the countries available in d
keep_idx <- full_d[,3] %in% d[,3]
full_d <- full_d[keep_idx,]
# Also sort full_d rows, as in d
sorted_idx <- sort(full_d[,3], index.return=TRUE)$ix
full_d <- full_d[sorted_idx,]

# Finally, add any new column like this. Beware missing values! (NAs)
num_col <- 32
d <- cbind(d, full_d[idx,num_col])
# To restrict rows to non-missing values:
d <- d[-(1:nrow(d))[is.na(rowSums(d[,4:ncol(d)]))],]
# NOTE: removing a column is easy:
num_col <- 10
d <- d[,-num_col]
