library(ggplot2)
df <-
data.frame(age = c(18, 21, 22, 24, 26, 26, 27, 30, 31, 35, 39, 40, 41, 42, 44,
46, 47, 48, 49, 54),
spend = c(10, 11, 22, 15, 12, 13, 14, 33,
39, 37, 44, 27, 29, 20, 28, 21, 30, 31, 23, 24))
ggplot(df,
aes(x = age, y = spend)) + geom_point()
Clustering in R
library(tidyverse) # data manipulation
library(cluster) # clustering algorithms
library(factoextra) # Visualization
################################3
View(USArrests)
df <-
USArrests
df <-
na.omit(df) #To remove any missing value
df <-
scale(df)
head(df)
#################################
distance
<- get_dist(df)
fviz_dist(distance,
gradient = list(low = "#00AFBB", mid = "white", high =
"#FC4E07"))
#################################
k2 <-
kmeans(df, centers = 2, nstart = 2)
str(k2)
fviz_cluster(k2,
data = df)
#################################
library(dplyr)
library(magrittr)
df %>%
as_tibble() %>%
mutate(cluster = k2$cluster,
state = row.names(USArrests)) %>%
ggplot(aes(UrbanPop, Murder, color =
factor(cluster), label = state)) +
geom_text()
###################################
k3 <-
kmeans(df, centers = 3, nstart = 25)
k4 <-
kmeans(df, centers = 4, nstart = 25)
k5 <-
kmeans(df, centers = 5, nstart = 25)
# plots to
compare
p1 <-
fviz_cluster(k2, geom = "point", data = df) + ggtitle("k =
2")
p2 <-
fviz_cluster(k3, geom = "point",
data = df) + ggtitle("k = 3")
p3 <-
fviz_cluster(k4, geom = "point",
data = df) + ggtitle("k = 4")
p4 <-
fviz_cluster(k5, geom = "point",
data = df) + ggtitle("k = 5")
library(gridExtra)
grid.arrange(p1,
p2, p3, p4, nrow = 2)
######################################
Clustering optimization
set.seed(123)
gap_stat
<- clusGap(df, FUN = kmeans, nstart = 25,
K.max = 10, B = 50)
# Print the
result
print(gap_stat,
method = "firstmax")
fviz_gap_stat(gap_stat)
final$cluster
View(df[final$cluster==1,])
#############################################
Hierarchical Cluster Analysis
#
Dissimilarity matrix
d <-
dist(df, method = "euclidean")
#
Hierarchical clustering using Complete Linkage
hc1 <-
hclust(d, method = "complete" )
# Plot the
obtained dendrogram
plot(hc1,
cex = 0.6, hang = -1)
##########################################
# Compute
distance matrix
res.dist
<- dist(df, method = "euclidean")
# Compute 2
hierarchical clusterings
hc1 <-
hclust(res.dist, method = "complete")
hc2 <-
hclust(res.dist, method = "ward.D2")
# Create two
dendrograms
dend1 <-
as.dendrogram (hc1)
dend2 <-
as.dendrogram (hc2)
tanglegram(dend1,
dend2)
No comments:
Post a Comment