Commit 3e1ecb5c authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

added some improvements

parent 7bce5627
...@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
```{r Load libraries} ```{r Load libraries}
library(FSelector) #for feature selection library(FSelector) #for feature selection / you need Java installed to load this package
library(party) #for classification algorithm decision trees library(party) #for classification algorithm decision trees
library(class) #for classification algorithm kNN library(class) #for classification algorithm kNN
library(e1071) #for classification algorithm SVM library(e1071) #for classification algorithm SVM
...@@ -46,34 +46,34 @@ calcFeatures.smd <- function(SMD){ ...@@ -46,34 +46,34 @@ calcFeatures.smd <- function(SMD){
# define some times # define some times
weekday <- 1:(5*4*24) weekday <- 1:(5*4*24)
weekend <- (5*4*24+1):672 weekend <-
night <- ( 1*4+1):( 6*4) night <-
morning <- ( 6*4+1):(10*4) morning <-
noon <- (10*4+1):(14*4) noon <-
afternoon <- (14*4+1):(18*4) afternoon <-
evening <- (18*4+1):(22*4) evening <-
#data.frame for the results #data.frame for the results
D=data.frame(c_week=mean(dm15, na.rm = T)) D=data.frame(c_week=mean(dm15, na.rm = T))
#calculate consumption features #calculate consumption features
D$c_night <- mean(dm15[night, 1:7], na.rm = T) D$c_night <- mean(dm15[night, 1:7], na.rm = T)
D$c_morning <- mean(dm15[morning, 1:7], na.rm = T) D$c_morning <- mean()
D$c_noon <- mean(dm15[noon, 1:7], na.rm = T) D$c_noon <- mean()
D$c_afternoon <- mean(dm15[afternoon, 1:7], na.rm = T) D$c_afternoon <- mean()
D$c_evening <- mean(dm15[evening, 1:7], na.rm = T) D$c_evening <- mean()
#calculate statistical features #calculate statistical features
D$s_we_max <- max(dm15[weekend], na.rm = T) D$s_we_max <- max()
D$s_we_min <- min(dm15[weekend], na.rm = T) D$s_we_min <- min()
D$s_wd_max <- max(dm15[weekday], na.rm = T) D$s_wd_max <- max()
D$s_wd_min <- min(dm15[weekday], na.rm = T) D$s_wd_min <- min()
#calculate relations #calculate relations
D$r_min_wd_we <- D$s_wd_min / D$s_we_min #division by 0 leads to NaN! D$r_min_wd_we <- D$s_wd_min / D$s_we_min #division by 0 leads to NaN!
D$r_min_wd_we <- ifelse(is.na(D$r_min_wd_we), 0, D$r_min_wd_we) D$r_min_wd_we <- ifelse(is.na(D$r_min_wd_we), 0, D$r_min_wd_we)
D$r_max_wd_we <- D$s_wd_max / D$s_we_max D$r_max_wd_we <-
D$r_max_wd_we <- ifelse(is.na(D$r_max_wd_we), 0, D$r_max_wd_we) D$r_max_wd_we <-
return(D) return(D)
} }
...@@ -109,7 +109,7 @@ for(i in 2:nrow(smd)){ ...@@ -109,7 +109,7 @@ for(i in 2:nrow(smd)){
```{r Classification Basic evaluation approach} ```{r Classification Basic evaluation approach}
## decisoon tree ## decison tree
#train the model #train the model
...@@ -127,7 +127,7 @@ for(i in 2:nrow(smd)){ ...@@ -127,7 +127,7 @@ for(i in 2:nrow(smd)){
## kNN ## kNN
# predict test cases from training data (lazy learning algoritm has no explicit training step!) # predict test cases from training data (lazy learning algorithm has no explicit training step!)
#create confusion matrix and calculate accuracy #create confusion matrix and calculate accuracy
......
...@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
```{r Load libraries} ```{r Load libraries}
library(FSelector) #for feature selection library(FSelector) #for feature selection / you need Java installed to load this package
library(party) #for classification algorithm decision trees library(party) #for classification algorithm decision trees
library(class) #for classification algorithm kNN library(class) #for classification algorithm kNN
library(e1071) #for classification algorithm SVM library(e1071) #for classification algorithm SVM
...@@ -144,7 +144,7 @@ alldata <- cbind(customers, features) ...@@ -144,7 +144,7 @@ alldata <- cbind(customers, features)
#simple call of the feature selection function #simple call of the feature selection function
cfs(pNumResidents ~ ., alldata) cfs(pNumResidents ~ ., alldata)
#Problem: other dependant variables are selected -> only use relevant variables in feature setection! #Problem: other dependent variables are selected -> only use relevant variables in feature selection!
#create a vector containing all feature names #create a vector containing all feature names
all.features <- setdiff(colnames(alldata), c("VID", "residents.numAdult", all.features <- setdiff(colnames(alldata), c("VID", "residents.numAdult",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment