#Chad Williamson #this script anaylzes and clusters the individual seasons of #all quarterbacks across every season in the NFL. Each season will be clustered #by the metrics of: yardage, touchdowns, and interceptions. #load cluster package library(cluster) #library used for visualization library(factoextra) #load data set nflData <- read.csv("nflPassingData.csv", header = TRUE) #remove any missing values nflData <- na.omit(nflData) #remove Name column for clustering nflData.features = nflData nflData.features$Name <- NULL #determine optimal number of clusters fviz_nbclust(nflData.features, kmeans, method = "wss") #run k-means clustering with 5 clusters clusters <- kmeans(nflData.features, 5,nstart = 25) #create table for clusters generated. Useful for looking at individual seasons print(table(nflData$Name, clusters$cluster)) #show cluster info print(clusters) #plot clusters par(mfrow=c(1,3)) plot(nflData[c("Passing_Yards","TD_Passes")], col = clusters$cluster, main = "Passing Yards vs Touchdowns") plot(nflData[c("Passing_Yards","Ints")], col = clusters$cluster, main = "Passing Yards vs Interceptions") plot(nflData[c("TD_Passes","Ints")], col = clusters$cluster, main = "Touchdowns vs Interceptions")