Commit c908fd69 authored by Hopf, Konstantin's avatar Hopf, Konstantin
Browse files

Changes made during L07

parent ed2e4e7f
......@@ -67,23 +67,29 @@ plotcl(Formatted_Readings, Cluster2$cluster, ylab="Consumption (kWh)", main="Max
#Transform the values to a more normally distributed form and run the k-means agaein
Datas <- sqrt(Formatted_Readings)
Datasn <- Datas/rowMeans(Datas)
Cluster3 <- kmeans(Datasn, centers=3)
Datasn <- Datasn[-297] #remove outlier
Cluster3 <- kmeans(Datasn,
plotcl(Formatted_Readings, Cluster3$cluster, ylab="Consumption (kWh)", main="Sqrt and Max-normaized consumption", lim=c(0,1.2))
#some details about the model
table(Cluster3$cluster) # size of the clusters 1
Cluster3$size # size of the clusters 2
Cluster3$withinss # variance in the clusters
Cluster3$betweenss # variance between the clusters
```{r obtain the optimal number of clusters}
Clusters <- list()
Clusters[[1]] <- kmeans(Datasn, centers=2)
Clusters[[2]] <- kmeans(Datasn, centers=3)
Clusters[[3]] <- kmeans(Datasn, centers=4)
Clusters[[4]] <- kmeans(Datasn, centers=5)
Clusters[[5]] <- kmeans(Datasn, centers=6)
Clusters[[6]] <- kmeans(Datasn, centers=7)
Clusters[[7]] <- kmeans(Datasn, centers=8)
Clusters[[8]] <- kmeans(Datasn, centers=9)
for(i in 1:8){
Clusters[[i]] <- kmeans(Datasn, centers=i+1)
#the total sum of squares
tot.withinss <- sapply(Clusters, function(v){return(v$tot.withinss)})
......@@ -112,7 +118,7 @@ C <- 1-cor(t(Formatted_Readings))
Dendrogram <- agnes(C,diss=T,method="complete")
plot(Dendrogram, which.plot=2) #plot the dendrogram
Cluster4 <- cutree(Dendrogram, k=3)
Cluster4 <- cutree(Dendrogram, k=4)
plotcl(Formatted_Readings, Cluster4, main="Hierarchical clustering results", ylab="Consumption (kWh)", lim=c(0,1.3))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment