Commit dc34781b authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

added new content

parent 1849225d
......@@ -45,7 +45,23 @@ data <- customer %>% left_join(consumption, by="CustomerID") %>%
```
```{r Clustering 1 - A first try with extreme values}
data_clustering <- data %>% select(CustomerID, NumDevices, LivingAreaM2, HouseholdMembers, n_logins, points, different_actions) %>% na.omit()
# simple clustering
k <- 3
set.seed(1)
cluster1 <- kmeans(x = data_clustering[,-1], centers = k)
data_clustering$kmeans_cluster1 <- cluster1$cluster
table(data_clustering$kmeans_cluster1)
# What we see: One cluster with only one customer inside. why? Let's look at the values
describeBy((data_clustering %>% select(-one_of(c("CustomerID")))), group="kmeans_cluster1", skew=FALSE)
# extreme value at the number of logins
plot(data_clustering$n_logins, data_clustering$kmeans_cluster1)
```
```{r Clustering 2 - A second try with untransformed data}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment