Commit c908fd69 authored by Hopf, Konstantin's avatar Hopf, Konstantin
Browse files

Changes made during L07

parent ed2e4e7f
...@@ -67,23 +67,29 @@ plotcl(Formatted_Readings, Cluster2$cluster, ylab="Consumption (kWh)", main="Max ...@@ -67,23 +67,29 @@ plotcl(Formatted_Readings, Cluster2$cluster, ylab="Consumption (kWh)", main="Max
#Transform the values to a more normally distributed form and run the k-means agaein #Transform the values to a more normally distributed form and run the k-means agaein
Datas <- sqrt(Formatted_Readings) Datas <- sqrt(Formatted_Readings)
Datasn <- Datas/rowMeans(Datas) Datasn <- Datas/rowMeans(Datas)
set.seed(4) Datasn <- Datasn[-297] #remove outlier
Cluster3 <- kmeans(Datasn, centers=3) set.seed(7)
Cluster3 <- kmeans(Datasn,
centers=3)
plotcl(Formatted_Readings, Cluster3$cluster, ylab="Consumption (kWh)", main="Sqrt and Max-normaized consumption", lim=c(0,1.2)) plotcl(Formatted_Readings, Cluster3$cluster, ylab="Consumption (kWh)", main="Sqrt and Max-normaized consumption", lim=c(0,1.2))
#some details about the model
table(Cluster3$cluster) # size of the clusters 1
Cluster3$size # size of the clusters 2
Cluster3$withinss # variance in the clusters
Cluster3$betweenss # variance between the clusters
``` ```
```{r obtain the optimal number of clusters} ```{r obtain the optimal number of clusters}
set.seed(2)
Clusters <- list() Clusters <- list()
Clusters[[1]] <- kmeans(Datasn, centers=2) for(i in 1:8){
Clusters[[2]] <- kmeans(Datasn, centers=3) set.seed(7)
Clusters[[3]] <- kmeans(Datasn, centers=4) Clusters[[i]] <- kmeans(Datasn, centers=i+1)
Clusters[[4]] <- kmeans(Datasn, centers=5) }
Clusters[[5]] <- kmeans(Datasn, centers=6)
Clusters[[6]] <- kmeans(Datasn, centers=7) Clusters[[2]]$size
Clusters[[7]] <- kmeans(Datasn, centers=8)
Clusters[[8]] <- kmeans(Datasn, centers=9)
#the total sum of squares #the total sum of squares
tot.withinss <- sapply(Clusters, function(v){return(v$tot.withinss)}) tot.withinss <- sapply(Clusters, function(v){return(v$tot.withinss)})
...@@ -112,7 +118,7 @@ C <- 1-cor(t(Formatted_Readings)) ...@@ -112,7 +118,7 @@ C <- 1-cor(t(Formatted_Readings))
Dendrogram <- agnes(C,diss=T,method="complete") Dendrogram <- agnes(C,diss=T,method="complete")
plot(Dendrogram, which.plot=2) #plot the dendrogram plot(Dendrogram, which.plot=2) #plot the dendrogram
Cluster4 <- cutree(Dendrogram, k=3) Cluster4 <- cutree(Dendrogram, k=4)
plotcl(Formatted_Readings, Cluster4, main="Hierarchical clustering results", ylab="Consumption (kWh)", lim=c(0,1.3)) plotcl(Formatted_Readings, Cluster4, main="Hierarchical clustering results", ylab="Consumption (kWh)", lim=c(0,1.3))
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment