Commit 6a6abb88 authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

Added R Introduction 4 with plots and across (new)

parent e8d7dd1b
---
title: 'Tutorial 13: Optimization'
output: html_notebook
editor_options:
chunk_output_type: inline
---
This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r}
library(lpSolve)
library(ggplot2)
```
```{r lpSolve default example}
#
# Set up problem: maximize
# x1 + 9 x2 + x3 subject to
# x1 + 2 x2 + 3 x3 <= 9
# 3 x1 + 2 x2 + 2 x3 <= 15
#
objective.in <- c(1, 9, 1)
const.mat <- matrix(c(1, 2, 3, 3, 2, 2), nrow=2, byrow=TRUE)
f.dir <- c("<=", "<=")
const.rhs <- c(9, 15)
direction <- "max"
lp(direction, objective.in, const.mat, f.dir, const.rhs)
lp(direction, objective.in, const.mat, f.dir, const.rhs)$solution
```
---
title: 'Tutorial 4: R Introduction 4'
output: html_notebook
editor_options:
chunk_output_type: inline
---
This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r Using dplyr: summarise}
summarise(fruits, mean(weight))
fruits_grouped_color <- group_by(fruits, color)
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n())
# Task 8
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight))
# Task 9
summarize(fruits_grouped_color, fist_fruit = first(fruit), last = last(fruit), n_distinct_fruits = n_distinct(fruit))
# Task 10
?n_distinct
```
```{r Using dplyr: the piping operator}
fruits %>%
group_by(color) %>%
summarise(avg_weight = mean(weight), num_fruits = n()) %>%
arrange(avg_weight)
# Task 11
fruits %>% filter(between(weight, 8, 100))
# Task 12
fruits_weight %>% arrange(desc(fruits_per_pound)) %>% select(fruit, fruits_per_pound)
# Task 13
fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight))
```
```{r Using dplyr: Joins}
recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2),
fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon"))
full_join(fruits, recipe)
left_join(fruits, recipe)
```
```{r Using dplyr: Across}
random_numbers <-
data.frame(group = c("a", "a", "a", "b", "b", "b"),
a_1 = runif(3, 0, 1),
a_2 = runif(3, 0, 1),
a_3 = runif(3, 0, 1),
b_1 = runif(3, 0, 1))
# Goal: Want to select only columns starting with "a_"
random_numbers %>% select(a_1, a_2, a_3)
# better:
random_numbers %>% select(contains("a_"))
# Goal: Want to round all numerics
random_numbers %>%
mutate(a_1 = round(a_1,2),
a_2 = round(a_2,2),
a_3 = round(a_3,2),
b_1 = round(b_1,2))
# better:
random_numbers %>%
mutate(across(is.numeric, ~round(.,2)))
# Goal: Want to calculate grouped mean for all numerics
random_numbers %>%
group_by(group) %>%
summarise(
a_1 = mean(a_1),
a_2 = mean(a_2),
a_3 = mean(a_3),
b_1 = mean(b_1)
)
# better:
random_numbers %>%
group_by(group) %>% summarise(across(is.numeric,~mean(.)))
```
```{r Illustrating analytical results by plotting techniques}
# Load and prepare data
Shower <- read.csv2("../../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group")
Shower$AvgTemperature <- as.numeric(Shower$AvgTemperature)
# Scatterplot
plot(x = Shower$AvgTemperature, y = Shower$Volume)
# Scatterplot with formular
plot(Shower$Volume ~ Shower$AvgTemperature)
# Barplot mean_volume per group
Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T))
barplot(Shower_groups$mean_volume, names.arg = Shower_groups$group)
# pie chart
pie(Shower_groups$mean_volume)
# boxplot
selected_showers <- Shower[Shower$Hh_ID %in% c(7890, 4624),]
boxplot(selected_showers$Volume ~ selected_showers$Hh_ID)
# line plot
plot(selected_showers[selected_showers$Hh_ID == 7890,]$Volume ~ selected_showers[selected_showers$Hh_ID == 7890,]$Shower, type="l", col="blue")
lines(selected_showers[selected_showers$Hh_ID == 4624,]$Volume ~ selected_showers[selected_showers$Hh_ID == 4624,]$Shower, col="red")
legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"),
col=c("blue", "red"), lty=1:2, cex=0.8)
# histogram
hist(Shower$Volume, breaks = 20)
# histogram and density line
hist(Shower$Volume, probability = T, breaks = 20)
lines(density(na.omit(Shower$Volume)), col="blue", lwd=2)
# QQPlot
qqnorm(Shower$Volume)
# logarithmized QQplot
qqnorm(log(Shower$Volume))
```
---
title: 'Tutorial 4: R Introduction 4'
output: html_notebook
editor_options:
chunk_output_type: inline
---
This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r Using dplyr: summarise}
summarise(fruits, mean(weight))
fruits_grouped_color <- group_by(fruits, color)
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n())
# Task 8
# Task 9
# Task 10
```
```{r Using dplyr: the piping operator}
fruits %>%
group_by(color) %>%
summarise(avg_weight = mean(weight), num_fruits = n()) %>%
arrange(avg_weight)
# Task 11
# Task 12
# Task 13
```
```{r Using dplyr: Joins}
recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2),
fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon"))
full_join(fruits, recipe)
left_join(fruits, recipe)
```
```{r Using dplyr: Across}
random_numbers <-
data.frame(group = c("a", "a", "a", "b", "b", "b"),
a_1 = runif(3, 0, 1),
a_2 = runif(3, 0, 1),
a_3 = runif(3, 0, 1),
b_1 = runif(3, 0, 1))
# Goal: Want to select only columns starting with "a_"
random_numbers %>% select(a_1, a_2, a_3)
# better:
random_numbers %>% select(contains("a_"))
# Goal: Want to round all numerics
random_numbers %>%
mutate(a_1 = round(a_1,2),
a_2 = round(a_2,2),
a_3 = round(a_3,2),
b_1 = round(b_1,2))
# better:
random_numbers %>%
mutate(across(is.numeric, ~round(.,2)))
# Goal: Want to calculate grouped mean for all numerics
random_numbers %>%
group_by(group) %>%
summarise(
a_1 = mean(a_1),
a_2 = mean(a_2),
a_3 = mean(a_3),
b_1 = mean(b_1)
)
# better:
random_numbers %>%
group_by(group) %>% summarise(across(is.numeric,~mean(.)))
```
```{r Illustrating analytical results by plotting techniques}
# Load and prepare data
Shower <- read.csv2("../../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group")
Shower$AvgTemperature <- as.numeric(Shower$AvgTemperature)
# Scatterplot
plot()
# Scatterplot with formular
plot( ~ )
# Barplot mean_volume per group
Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T))
barplot()
# pie chart
pie()
# boxplot
selected_showers <- Shower[Shower$Hh_ID %in% c(7890, 4624),]
boxplot()
# line plot
plot()
lines()
# histogram
hist()
# histogram and density line
hist()
lines()
# QQPlot
qqnorm()
# logarithmized QQplot
qqnorm()
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment