## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## 
## Rows: 1599 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4898 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
boxplot(scale(red_wines))

red_wines %>% 
        select(!quality) %>% #remove quality since it is the response variable
        data.matrix() %>%
        prcomp(scale = TRUE) -> red_wine_pca

red_wine_pca$rotation
##                              PC1          PC2         PC3          PC4
## fixed acidity         0.48931422  0.110502738 -0.12330157  0.229617370
## volatile acidity     -0.23858436 -0.274930480 -0.44996253 -0.078959783
## citric acid           0.46363166  0.151791356  0.23824707  0.079418256
## residual sugar        0.14610715 -0.272080238  0.10128338  0.372792562
## chlorides             0.21224658 -0.148051555 -0.09261383 -0.666194756
## free sulfur dioxide  -0.03615752 -0.513566812  0.42879287  0.043537818
## total sulfur dioxide  0.02357485 -0.569486959  0.32241450  0.034577115
## density               0.39535301 -0.233575490 -0.33887135  0.174499758
## pH                   -0.43851962 -0.006710793  0.05769735  0.003787746
## sulphates             0.24292133  0.037553916  0.27978615 -0.550872362
## alcohol              -0.11323206  0.386180959  0.47167322  0.122181088
##                              PC5         PC6         PC7         PC8
## fixed acidity        -0.08261366 -0.10147858  0.35022736 -0.17759545
## volatile acidity      0.21873452 -0.41144893  0.53373510 -0.07877531
## citric acid          -0.05857268 -0.06959338 -0.10549701 -0.37751558
## residual sugar        0.73214429 -0.04915555 -0.29066341  0.29984469
## chlorides             0.24650090 -0.30433857 -0.37041337 -0.35700936
## free sulfur dioxide  -0.15915198  0.01400021  0.11659611 -0.20478050
## total sulfur dioxide -0.22246456 -0.13630755  0.09366237  0.01903597
## density               0.15707671  0.39115230  0.17048116 -0.23922267
## pH                    0.26752977  0.52211645  0.02513762 -0.56139075
## sulphates             0.22596222  0.38126343  0.44746911  0.37460432
## alcohol               0.35068141 -0.36164504  0.32765090 -0.21762556
##                               PC9        PC10         PC11
## fixed acidity        -0.194020908  0.24952314 -0.639691452
## volatile acidity      0.129110301 -0.36592473 -0.002388597
## citric acid           0.381449669 -0.62167708  0.070910304
## residual sugar       -0.007522949 -0.09287208 -0.184029964
## chlorides            -0.111338666  0.21767112 -0.053065322
## free sulfur dioxide  -0.635405218 -0.24848326  0.051420865
## total sulfur dioxide  0.592115893  0.37075027 -0.068701598
## density              -0.020718675  0.23999012  0.567331898
## pH                    0.167745886  0.01096960 -0.340710903
## sulphates             0.058367062 -0.11232046 -0.069555381
## alcohol              -0.037603106  0.30301450  0.314525906
fviz_pca_biplot(red_wine_pca,geom = "point",pointsize = .5)

fviz_pca_ind(red_wine_pca,repel = TRUE)

qplot(c(1:11), red_wine_pca$sdev^2 / sum(red_wine_pca$sdev^2)) + 
        geom_line() + 
        xlab("Principal Component") + 
        ylab("Variance Explained") +
        ggtitle("Scree Plot")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

red_wines %>% 
        mutate(Wine_Type = "red") -> red_wines_2

white_wines %>% 
        mutate(Wine_Type = "white") -> white_wines_2

wines <- rbind(red_wines_2,white_wines_2)

wines %>%
        select(!quality) %>%
        data.matrix() %>%
        prcomp(scale =TRUE) -> wine_pca

fviz_pca_ind(wine_pca,habillage=wines$Wine_Type,repel =TRUE)

fviz_pca_biplot(wine_pca,label="var",habillage=wines$Wine_Type)