base_url = "https://www.imo.universite-paris-saclay.fr/~auder/teachings/analyse-de-donnees/data"
data = read.csv(paste0(base_url,"/table_Nobel.csv"))
# La première colonne contient les noms des pays = labels des lignes
row.names(data) = data[,1]
data = data[,-1]

GBIndex = (1:nrow(data)) [row.names(data) == "GB"] #4
ItalyIndex = (1:nrow(data)) [row.names(data) == "Italie"] #5
data2 = data #R fera une copie quand on modifiera data2
data2[GBIndex,] = data[ItalyIndex,] + rpois(ncol(data),3) - 3

library(FactoMineR)
res.ca = CA(data2, graph=FALSE)


options(repr.plot.width=15, repr.plot.height=10)
plot(res.ca)


data2[GBIndex,] = 2 * data[ItalyIndex,]
res.ca_split = CA(data2, graph=FALSE)
data2[ItalyIndex,] = 3 * data[ItalyIndex,]
data2[GBIndex,] = data2[nrow(data),] #USA
data2 = data2[-nrow(data),]
row.names(data2)[GBIndex] = "USA"
res.ca_fusion = CA(data2, graph=FALSE)


library(gridExtra)
library(ggplot2)
plot1 = plot(res.ca_split)
plot2 = plot(res.ca_fusion)
grid.arrange(plot1, plot2, ncol=2)


data = read.csv(paste0(base_url,"/MAMe_dataset.csv"))
head(data)


data_tbl = table(subset(data, select=c("Medium","Museum")))
res.ca = CA(data_tbl, graph=FALSE)


plot(res.ca)

Warning message:
“ggrepel: 10 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


indices = row.names(data_tbl) %in% c("Lithograph","Hand-colored engraving")
data_tbl[indices,]

                        Museum
Medium                   Los Angeles County Museum of Art
  Hand-colored engraving                              986
  Lithograph                                          306
                        Museum
Medium                   Metropolitan Museum of Art The Cleveland Museum of Art
  Hand-colored engraving                         47                          45
  Lithograph                                    572                         572


# Cherchons à regrouper les lignes similaires (résultat quasi inchangé, plus lisible)
proportions = t(apply(data_tbl, 1, function(row) row / sum(row)))
k = kmeans(proportions, 6) #6 trouvé par "essais-erreurs"


reduced_table = t(sapply(1:6, function(i) {
    inds = (1:nrow(data_tbl))[k$cluster == i]
    if (length(inds)>1) {colSums(data_tbl[inds,])} else {data_tbl[inds,]}
}))
row.names(reduced_table) = sapply(1:6, function(i) {
    inds = (1:nrow(data_tbl))[k$cluster == i]
    Reduce(function(x,y) paste(x, y, sep=";"), row.names(data_tbl)[inds])
})


rbind(reduced_table, colMeans(reduced_table)) #profil moyen en bas


res.ca = CA(reduced_table, graph=FALSE)
plot(res.ca)


data = read.csv(paste0(base_url,"/paintings.csv"))
head(data)


nrow(data) ; length(unique(data$painting))


length(unique(data$artist))


t1 = table(subset(data, select=c("S4","S23")))
res.ca = CA(t1, graph=FALSE)
plot(res.ca)

t1

   S23
S4   1  2  3  4  5
  1 11  4  3  4  0
  2  2  1  0  0  0
  3  0  1  1  0  1
  4  2  0  2  0  0
  5  1  4  1  0  1


t2 = table(subset(data, select=c("S12","art.movement")))
res.ca = CA(t2, graph=FALSE)
plot(res.ca)


t(t2)

                    S12
art.movement         1 2 3 4 5
  Abstract art       2 1 0 0 0
  Art Nouveau        1 0 0 0 2
  Baroque            1 2 0 0 0
  Cubism             2 1 0 0 0
  Impressionism      1 0 0 1 1
  Neoclassicism      1 1 0 1 0
  Op art             2 0 1 0 0
  Pop art            0 0 1 1 1
  Post-Impressionism 0 1 2 0 0
  Renaissance        1 2 0 0 0
  Romanticism        0 0 1 2 0
  Surrealism         1 1 0 1 0
  Symbolism          2 1 0 0 0


rowMeans(t2[,c("Art Nouveau","Impressionism")])
rowMeans(t2[,c("Pop art","Romanticism","Post-Impressionism")])
rowMeans(t2[,c("Abstract art","Baroque","Cubism","Nbeoclassicism","Op art","Renaissance","Surrealism","Symbolism")])

	Image.file	Medium	Museum	Museum.based.instance.ID	Subset	Width	Height	Product.size	Aspect.ratio
	<chr>	<chr>	<chr>	<chr>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
1	436018.jpg	Oil on canvas	Metropolitan Museum of Art	29.100.60	train	3144	3840	12072960	0.8187
2	11779.jpg	Oil on canvas	Metropolitan Museum of Art	1982.373	train	1707	2136	3646152	0.7992
3	19022.jpg	Oil on canvas	Metropolitan Museum of Art	2006.418	train	2845	3811	10842295	0.7465
4	435883.jpg	Oil on canvas	Metropolitan Museum of Art	61.101.3	train	3811	2916	11112876	1.3069
5	10481.jpg	Oil on canvas	Metropolitan Museum of Art	09.95	train	3811	2099	7999289	1.8156
6	13345.jpg	Oil on canvas	Metropolitan Museum of Art	84.12a	train	2655	3722	9881910	0.7133

	Los Angeles County Museum of Art	Metropolitan Museum of Art	The Cleveland Museum of Art
Graphite;Iron;Marble;Polychromed wood	233.0000	3170.000	682.000
Albumen photograph;Clay;Hand-colored etching;Pen and brown ink;Silk and metal thread;Woven fabric	39.0000	6998.000	141.000
Faience;Glass;Ivory;Limestone;Steel;Wood	510.0000	6794.000	701.000
Gold;Porcelain;Silver;Wood engraving;Woodcut	615.0000	4299.000	1997.000
Bronze;Ceramic;Engraving;Etching;Lithograph;Oil on canvas;Woodblock	3148.0000	3603.000	3399.000
Hand-colored engraving	986.0000	47.000	45.000
	921.8333	4151.833	1160.833

	S1	S2	S3	S4	S5	S6	S7	S8	S9	S10	⋯	S42	S43	S44	S45	S46	S47	S48	art.movement	artist	painting
	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	⋯	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<chr>	<chr>	<chr>
1	3	2	3	1	1	1	2	2	3	1	⋯	2	1	2	2	4	2	2	Renaissance	Sandro Botticelli	The Birth of Venus
2	2	2	2	4	1	1	2	3	4	3	⋯	4	4	1	4	2	2	1	Renaissance	Leonardo da Vinci	Lady with an Ermine
3	1	3	3	1	1	1	5	2	1	2	⋯	1	1	1	4	3	1	3	Renaissance	Raphael	Three Graces
4	5	3	1	1	1	1	3	3	1	1	⋯	2	3	1	1	3	2	5	Baroque	Caravaggio	Entombment
5	2	2	2	1	1	1	1	2	1	1	⋯	1	1	2	1	4	2	4	Baroque	Diego Velazquez	Rokeby Venus
6	5	4	4	1	1	1	3	5	2	1	⋯	2	2	2	3	4	1	5	Baroque	Rembrandt van Rijn	The Night Watch

Exercice 0¶

Exercice 1¶

Exercice 2¶