This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r Load libraries}
library(FSelector) #for feature selection / you need Java installed to load this package
library(party) #for classification algorithm decision trees
library(class) #for classification algorithm kNN
library(e1071) #for classification algorithm SVM
library(randomForest) #further random forest
```
```{r Load and prepare data}
# Load data
# Derive and investigate the dependent variable "number of residents"
```
```{r Detailed analysis of the independent variables}
# Descriptive analysis of load traces -------------------------------------
# Plot some load curves from households to get familiar with the data
household <- 8
```
```{r Feature extraction}
# Define and implement 10 features from SMD (e.g. mean consumption, mean
# consumption in the evening)
calcFeatures.smd <- function(SMD){
#SMD: the load trace for one week (vector with 672 elements)
#create a matrix with 7 columns for each day
dm15=matrix(as.numeric(SMD),ncol=7)
# define some times
weekday <- 1:(5*4*24)
weekend <-
night <-
morning <-
noon <-
afternoon <-
evening <-
#data.frame for the results
D=data.frame(c_week=mean(dm15, na.rm = T))
#calculate consumption features
D$c_night <- mean(dm15[night, 1:7], na.rm = T)
D$c_morning <- mean()
D$c_noon <- mean()
D$c_afternoon <- mean()
D$c_evening <- mean()
#calculate statistical features
D$s_we_max <- max()
D$s_we_min <- min()
D$s_wd_max <- max()
D$s_wd_min <- min()
#calculate relations
D$r_min_wd_we <- D$s_wd_min / D$s_we_min #division by 0 leads to NaN!