Commit 3e1ecb5c authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

added some improvements

parent 7bce5627
......@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
```{r Load libraries}
library(FSelector) #for feature selection
library(FSelector) #for feature selection / you need Java installed to load this package
library(party) #for classification algorithm decision trees
library(class) #for classification algorithm kNN
library(e1071) #for classification algorithm SVM
......@@ -46,34 +46,34 @@ calcFeatures.smd <- function(SMD){
# define some times
weekday <- 1:(5*4*24)
weekend <- (5*4*24+1):672
night <- ( 1*4+1):( 6*4)
morning <- ( 6*4+1):(10*4)
noon <- (10*4+1):(14*4)
afternoon <- (14*4+1):(18*4)
evening <- (18*4+1):(22*4)
weekend <-
night <-
morning <-
noon <-
afternoon <-
evening <-
#data.frame for the results
D=data.frame(c_week=mean(dm15, na.rm = T))
#calculate consumption features
D$c_night <- mean(dm15[night, 1:7], na.rm = T)
D$c_morning <- mean(dm15[morning, 1:7], na.rm = T)
D$c_noon <- mean(dm15[noon, 1:7], na.rm = T)
D$c_afternoon <- mean(dm15[afternoon, 1:7], na.rm = T)
D$c_evening <- mean(dm15[evening, 1:7], na.rm = T)
D$c_morning <- mean()
D$c_noon <- mean()
D$c_afternoon <- mean()
D$c_evening <- mean()
#calculate statistical features
D$s_we_max <- max(dm15[weekend], na.rm = T)
D$s_we_min <- min(dm15[weekend], na.rm = T)
D$s_wd_max <- max(dm15[weekday], na.rm = T)
D$s_wd_min <- min(dm15[weekday], na.rm = T)
D$s_we_max <- max()
D$s_we_min <- min()
D$s_wd_max <- max()
D$s_wd_min <- min()
#calculate relations
D$r_min_wd_we <- D$s_wd_min / D$s_we_min #division by 0 leads to NaN!
D$r_min_wd_we <- ifelse(is.na(D$r_min_wd_we), 0, D$r_min_wd_we)
D$r_max_wd_we <- D$s_wd_max / D$s_we_max
D$r_max_wd_we <- ifelse(is.na(D$r_max_wd_we), 0, D$r_max_wd_we)
D$r_max_wd_we <-
D$r_max_wd_we <-
return(D)
}
......@@ -109,7 +109,7 @@ for(i in 2:nrow(smd)){
```{r Classification Basic evaluation approach}
## decisoon tree
## decison tree
#train the model
......@@ -127,7 +127,7 @@ for(i in 2:nrow(smd)){
## kNN
# predict test cases from training data (lazy learning algoritm has no explicit training step!)
# predict test cases from training data (lazy learning algorithm has no explicit training step!)
#create confusion matrix and calculate accuracy
......
......@@ -9,7 +9,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
```{r Load libraries}
library(FSelector) #for feature selection
library(FSelector) #for feature selection / you need Java installed to load this package
library(party) #for classification algorithm decision trees
library(class) #for classification algorithm kNN
library(e1071) #for classification algorithm SVM
......@@ -144,7 +144,7 @@ alldata <- cbind(customers, features)
#simple call of the feature selection function
cfs(pNumResidents ~ ., alldata)
#Problem: other dependant variables are selected -> only use relevant variables in feature setection!
#Problem: other dependent variables are selected -> only use relevant variables in feature selection!
#create a vector containing all feature names
all.features <- setdiff(colnames(alldata), c("VID", "residents.numAdult",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment