Commit 1849225d authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

added first part

parent 8745fbd1
......@@ -18,18 +18,30 @@ library(readr)
```{r Read and prepare data}
# read data
consumption <-
customer <-
logins <-
survey <-
portal_points <-
consumption <- read_csv(file="../data/clustering/bam_energy_report_consumption.csv", na = "NULL")
customer <- read_csv(file="../data/clustering/bam_energy_report_customers.csv", na = "NULL")
logins <- read_csv(file="../data/clustering/bam_energy_report_logins.csv", na = "NULL")
survey <- read_csv(file="../data/clustering/bam_energy_report_survey.csv", na = "NULL")
portal_points <- read_csv(file="../data/clustering/bam_energy_report_portal_points.csv", na = "NULL")
# convert data
consumption$CustomerID <- as.character(consumption$CustomerID)
customer$CustomerID <- as.character(customer$CustomerID)
logins$CustomerID <- as.character(logins$CustomerID)
survey$CustomerID <- as.character(survey$CustomerID)
portal_points$CustomerID <- as.character(portal_points$CustomerID)
# aggregate and calculate data
survey$answers <- rowSums(!is.na(survey[,-1]))
portal_points_agg <- portal_points %>% group_by(CustomerID) %>% summarize(points = sum(Points), different_actions = n_distinct(TaskCode))
logins_agg <- logins %>% group_by(CustomerID) %>% summarize(n_logins = n())
consumption$consumption_normalized <- consumption$consumption_2012 / consumption$billing_days_2012
# Join data together
data <- customer %>% left_join(consumption, by="CustomerID") %>%
left_join(logins_agg, by="CustomerID") %>%
left_join(survey, by="CustomerID") %>%
left_join(portal_points_agg, by="CustomerID")
```
```{r Clustering 1 - A first try with extreme values}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment