Commit b204afbd authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

added solution for Tutorial 10, Task 3

parent b270aa49
......@@ -107,6 +107,7 @@ plot(performance(pred, "tpr", "fpr"))
```
```{r Classification with probabilities - multiclass}
testdata <- na.omit(all_data[test.cases, c("pNumResidents", selected.features)])
traindata <- na.omit(all_data[-test.cases, c("pNumResidents", selected.features)])
......@@ -132,56 +133,3 @@ performance(pred, "auc")
#the ROC curve shows true-positive-rate vs. false-positive rate
plot(performance(pred, "tpr", "fpr"))
```
```{r Classification with an advanced evaluation technique: cross-validation}
set.seed(1506)
all_data$crossfolds <- sample(1:5, nrow(all_data), replace = TRUE)
# list for the interim results
results <- list()
for(foldIndex in 1:5){
# creating data for the
testdata <- na.omit(all_data[all_data$crossfolds==foldIndex, c("pNumResidents2", selected.features)])
traindata <- na.omit(all_data[all_data$crossfolds!=foldIndex, c("pNumResidents2", selected.features)])
model <- svm(pNumResidents2 ~ . , data=traindata, probability = T)
results[[foldIndex]] <- list()
results[[foldIndex]]$model <- model
clres <- predict(model, newdata=testdata, probability = T)
results[[foldIndex]]$probs <- attributes(clres)$probabilities
cm <- table(clres, testdata$pNumResidents2)
results[[foldIndex]]$cm <- cm
results[[foldIndex]]$accuracy <- (accuracy <- (sum(diag(cm))/sum(as.vector(cm))))
results[[foldIndex]]$labels <- testdata$pNumResidents2
}
# FROM HERE ON EXERCISE
# calculate the average accuracy
accuracy_array <- simplify2array(lapply(X = results, FUN = function(i) i$accuracy))
accuracy_mean <- mean(accuracy_array)
accuracy_mean
# print the different roc curves for each model and calculate the average auc, your target class is ">2 persons"
propabilities_list <- lapply(X = results, FUN = function(i) i$probs[,">2 persons"])
label_list <- lapply(X = results, FUN = function(i) i$labels)
pred <- prediction(predictions = propabilities_list, labels = label_list)
auc_array <- unlist(performance(pred, "auc")@y.values)
auc_mean <- mean(auc_array)
auc_mean
#the ROC curve shows true-positive-rate vs. false-positive rate
plot(performance(pred, "tpr", "fpr"))
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment