fixed the issue. Will look into that further A metric that you can use to evaluate the result of a clustering is the

silhouette coefficient. This value basically computes:

```
silhouette coefficient = 1 - (intra-cluster cohesion) / (inter-cluster separation)
```

```
plot(silhouette(kmeans(df, centers=2)$cluster, dist(df)))
```

```
plot(silhouette(kmeans(df, centers=3)$cluster, dist(df)))
```

```
plot(silhouette(kmeans(df, centers=4)$cluster, dist(df)))
```

```
library(cluster) # for silhouette
library(ggplot2) # for ggplot
library(scales) # for pretty_breaks
# Create sample 2-D data set with clusters around the points (1,1), (2,4), and (3,1)
x<- c(rnorm(n=25, mean=1,sd=.1), rnorm(n=25,mean=2,sd=.1),rnorm(n=25,mean=3,sd=.2))
y<- c(rnorm(n=25, mean=1,sd=.1), rnorm(n=25,mean=4,sd=.1),rnorm(n=25,mean=1,sd=.2))
df <- data.frame(x=x, y=y)
xMax <- max(x)
yMax <- max(y)
print(ggplot(df, aes(x,y)) + geom_point() + xlim(0, max(xMax, yMax)) + ylim(0, max(xMax,yMax)))
# Use the Iris data set.
#df <- subset(iris, select=-c(Species))
#df <- scale(df)
# Run through multiple candidate values of K clusters.
xValues <- c() # Holds the kvalues (x-axis)
yValues <- c() # Holds the silhouette coefficient values (y-axis)
bestKValue <- 0
bestSilhouetteCoefficient <- 0
kSequence <- seq(2, 5)
for (kValue in kSequence) {
xValues <- append(xValues, kValue)
kmeansResult <- kmeans(df, centers=kValue, nstart=5)
silhouetteResult <- silhouette(kmeansResult$cluster, dist(df))
silhouetteCoefficient <- mean(silhouetteResult[,3])
yValues <- append(yValues, silhouetteCoefficient)
if (silhouetteCoefficient > bestSilhouetteCoefficient) {
bestSilhouetteCoefficient <- silhouetteCoefficient
bestKValue <- kValue
}
}
# Create a dataframe for ggplot to plot the accumulated silhouette values.
dfSilhouette <- data.frame(k=xValues, silhouetteCoefficient=yValues)
# Create the ggplot line plot for silhouette coefficient.
silhouettePlot<- ggplot(data=dfSilhouette, aes(k)) +
geom_line(aes(y=silhouetteCoefficient)) +
xlab("k") +
ylab("Average silhouette width") +
ggtitle("Average silhouette width") +
scale_x_continuous(breaks=pretty_breaks(n=20))
print(silhouettePlot)
printf <- function(...) cat(sprintf(...))
printf("Best Silhouette coefficient=%f occurred at k=%d", bestSilhouetteCoefficient, bestKValue )
```