Commit 6a6abb88 by Weigert, Andreas

### Added R Introduction 4 with plots and across (new)

parent e8d7dd1b
 --- title: 'Tutorial 13: Optimization' output: html_notebook editor_options: chunk_output_type: inline --- This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg. ```{r} library(lpSolve) library(ggplot2) ``` ```{r lpSolve default example} # # Set up problem: maximize # x1 + 9 x2 + x3 subject to # x1 + 2 x2 + 3 x3 <= 9 # 3 x1 + 2 x2 + 2 x3 <= 15 # objective.in <- c(1, 9, 1) const.mat <- matrix(c(1, 2, 3, 3, 2, 2), nrow=2, byrow=TRUE) f.dir <- c("<=", "<=") const.rhs <- c(9, 15) direction <- "max" lp(direction, objective.in, const.mat, f.dir, const.rhs) lp(direction, objective.in, const.mat, f.dir, const.rhs)\$solution ```
 --- title: 'Tutorial 4: R Introduction 4' output: html_notebook editor_options: chunk_output_type: inline --- This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg. ```{r Using dplyr: summarise} summarise(fruits, mean(weight)) fruits_grouped_color <- group_by(fruits, color) summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n()) # Task 8 summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight)) # Task 9 summarize(fruits_grouped_color, fist_fruit = first(fruit), last = last(fruit), n_distinct_fruits = n_distinct(fruit)) # Task 10 ?n_distinct ``` ```{r Using dplyr: the piping operator} fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n()) %>% arrange(avg_weight) # Task 11 fruits %>% filter(between(weight, 8, 100)) # Task 12 fruits_weight %>% arrange(desc(fruits_per_pound)) %>% select(fruit, fruits_per_pound) # Task 13 fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight)) ``` ```{r Using dplyr: Joins} recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2), fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon")) full_join(fruits, recipe) left_join(fruits, recipe) ``` ```{r Using dplyr: Across} random_numbers <- data.frame(group = c("a", "a", "a", "b", "b", "b"), a_1 = runif(3, 0, 1), a_2 = runif(3, 0, 1), a_3 = runif(3, 0, 1), b_1 = runif(3, 0, 1)) # Goal: Want to select only columns starting with "a_" random_numbers %>% select(a_1, a_2, a_3) # better: random_numbers %>% select(contains("a_")) # Goal: Want to round all numerics random_numbers %>% mutate(a_1 = round(a_1,2), a_2 = round(a_2,2), a_3 = round(a_3,2), b_1 = round(b_1,2)) # better: random_numbers %>% mutate(across(is.numeric, ~round(.,2))) # Goal: Want to calculate grouped mean for all numerics random_numbers %>% group_by(group) %>% summarise( a_1 = mean(a_1), a_2 = mean(a_2), a_3 = mean(a_3), b_1 = mean(b_1) ) # better: random_numbers %>% group_by(group) %>% summarise(across(is.numeric,~mean(.))) ``` ```{r Illustrating analytical results by plotting techniques} # Load and prepare data Shower <- read.csv2("../../data/Shower_data.csv") Shower\$group <- as.factor(Shower\$group) levels(Shower\$group) <- c("First group", "Second group", "Fourth group", "Third group", "Fifth group", "Sixth group") Shower\$AvgTemperature <- as.numeric(Shower\$AvgTemperature) # Scatterplot plot(x = Shower\$AvgTemperature, y = Shower\$Volume) # Scatterplot with formular plot(Shower\$Volume ~ Shower\$AvgTemperature) # Barplot mean_volume per group Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T)) barplot(Shower_groups\$mean_volume, names.arg = Shower_groups\$group) # pie chart pie(Shower_groups\$mean_volume) # boxplot selected_showers <- Shower[Shower\$Hh_ID %in% c(7890, 4624),] boxplot(selected_showers\$Volume ~ selected_showers\$Hh_ID) # line plot plot(selected_showers[selected_showers\$Hh_ID == 7890,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 7890,]\$Shower, type="l", col="blue") lines(selected_showers[selected_showers\$Hh_ID == 4624,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 4624,]\$Shower, col="red") legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"), col=c("blue", "red"), lty=1:2, cex=0.8) # histogram hist(Shower\$Volume, breaks = 20) # histogram and density line hist(Shower\$Volume, probability = T, breaks = 20) lines(density(na.omit(Shower\$Volume)), col="blue", lwd=2) # QQPlot qqnorm(Shower\$Volume) # logarithmized QQplot qqnorm(log(Shower\$Volume)) ```
 --- title: 'Tutorial 4: R Introduction 4' output: html_notebook editor_options: chunk_output_type: inline --- This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg. ```{r Using dplyr: summarise} summarise(fruits, mean(weight)) fruits_grouped_color <- group_by(fruits, color) summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n()) # Task 8 # Task 9 # Task 10 ``` ```{r Using dplyr: the piping operator} fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n()) %>% arrange(avg_weight) # Task 11 # Task 12 # Task 13 ``` ```{r Using dplyr: Joins} recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2), fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon")) full_join(fruits, recipe) left_join(fruits, recipe) ``` ```{r Using dplyr: Across} random_numbers <- data.frame(group = c("a", "a", "a", "b", "b", "b"), a_1 = runif(3, 0, 1), a_2 = runif(3, 0, 1), a_3 = runif(3, 0, 1), b_1 = runif(3, 0, 1)) # Goal: Want to select only columns starting with "a_" random_numbers %>% select(a_1, a_2, a_3) # better: random_numbers %>% select(contains("a_")) # Goal: Want to round all numerics random_numbers %>% mutate(a_1 = round(a_1,2), a_2 = round(a_2,2), a_3 = round(a_3,2), b_1 = round(b_1,2)) # better: random_numbers %>% mutate(across(is.numeric, ~round(.,2))) # Goal: Want to calculate grouped mean for all numerics random_numbers %>% group_by(group) %>% summarise( a_1 = mean(a_1), a_2 = mean(a_2), a_3 = mean(a_3), b_1 = mean(b_1) ) # better: random_numbers %>% group_by(group) %>% summarise(across(is.numeric,~mean(.))) ``` ```{r Illustrating analytical results by plotting techniques} # Load and prepare data Shower <- read.csv2("../../data/Shower_data.csv") Shower\$group <- as.factor(Shower\$group) levels(Shower\$group) <- c("First group", "Second group", "Fourth group", "Third group", "Fifth group", "Sixth group") Shower\$AvgTemperature <- as.numeric(Shower\$AvgTemperature) # Scatterplot plot() # Scatterplot with formular plot( ~ ) # Barplot mean_volume per group Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T)) barplot() # pie chart pie() # boxplot selected_showers <- Shower[Shower\$Hh_ID %in% c(7890, 4624),] boxplot() # line plot plot() lines() # histogram hist() # histogram and density line hist() lines() # QQPlot qqnorm() # logarithmized QQplot qqnorm() ```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!