Commit e8d7dd1b by Weigert, Andreas

Updated Tutorial 3

parent e18f10db
 ... ... @@ -51,7 +51,6 @@ filter(fruits, color %in% c("yellow", "red")) # Task 3 filter(fruits, color %in% c("yellow", "red") & fruit != "banana") ``` ... ... @@ -75,96 +74,3 @@ select(arrange(fruits_weight, desc(fruits_per_pound)), fruit, fruits_per_pound) source(file = "../functions/pot.R") mutate(fruits_weight, fruits_per_kg_pot = pot(fruits_per_kg)) ``` ```{r Using dplyr: summarize} summarise(fruits, mean(weight)) fruits_grouped_color <- group_by(fruits, color) summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n()) # Task 8 summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight)) # Task 9 summarize(fruits_grouped_color, fist_fruit = first(fruit), last = last(fruit), n_distinct_fruits = n_distinct(fruit)) # Task 10 ?n_distinct ``` ```{r Using dplyr: the piping operator} fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n()) %>% arrange(avg_weight) # Task 11 fruits %>% filter(between(weight, 8, 100)) # Task 12 fruits_weight %>% arrange(desc(fruits_per_pound)) %>% select(fruit, fruits_per_pound) # Task 13 fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight)) ``` ```{r Using dplyr: Joins} recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2), fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon")) full_join(fruits, recipe) left_join(fruits, recipe) ``` ```{r Illustrating analytical results by plotting techniques} # Load and prepare data Shower <- read.csv2("../../data/Shower_data.csv") Shower\$group <- as.factor(Shower\$group) levels(Shower\$group) <- c("First group", "Second group", "Fourth group", "Third group", "Fifth group", "Sixth group") Shower\$AvgTemperature <- as.numeric(Shower\$AvgTemperature) # Scatterplot plot(x = Shower\$AvgTemperature, y = Shower\$Volume) # Scatterplut with formular plot(Shower\$Volume ~ Shower\$AvgTemperature) # Barplot mean_volume per group Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T)) barplot(Shower_groups\$mean_volume, names.arg = Shower_groups\$group) # pie chart pie(Shower_groups\$mean_volume) # boxplot selected_showers <- Shower[Shower\$Hh_ID %in% c(7890, 4624),] boxplot(selected_showers\$Volume ~ selected_showers\$Hh_ID) # line plot plot(selected_showers[selected_showers\$Hh_ID == 7890,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 7890,]\$Shower, type="l", col="blue") lines(selected_showers[selected_showers\$Hh_ID == 4624,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 4624,]\$Shower, col="red") legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"), col=c("blue", "red"), lty=1:2, cex=0.8) # histogram hist(Shower\$Volume, breaks = 20) # histogram and density line hist(Shower\$Volume, probability = T, breaks = 20) lines(density(na.omit(Shower\$Volume)), col="blue", lwd=2) # QQPlot qqnorm(Shower\$Volume) # logarithmized QQplot qqnorm(log(Shower\$Volume)) ```
 ... ... @@ -74,93 +74,3 @@ fruits_weight <- mutate(fruits, fruits_per_kg = round(1000/weight, 1)) # Task 7 ``` ```{r Using dplyr: summarize} summarise(fruits, mean(weight)) fruits_grouped_color <- group_by(fruits, color) summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n()) # Task 8 # Task 9 # Task 10 ``` ```{r Using dplyr: the piping operator} fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n()) %>% arrange(avg_weight) # Task 11 # Task 12 # Task 13 ``` ```{r Using dplyr: Joins} recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2), fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon")) full_join(fruits, recipe) left_join(fruits, recipe) ``` ```{r Illustrating analytical results by plotting techniques} # Load and prepare data Shower <- read.csv2("../data/Shower_data.csv") Shower\$group <- as.factor(Shower\$group) levels(Shower\$group) <- c("First group", "Second group", "Fourth group", "Third group", "Fifth group", "Sixth group") Shower\$AvgTemperature <- as.numeric(Shower\$AvgTemperature) # Scatterplot plot(x = Shower\$AvgTemperature, y = Shower\$Volume) # Scatterplut with formular plot(Shower\$Volume ~ Shower\$AvgTemperature) # Barplot mean_volume per group Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T)) barplot(Shower_groups\$mean_volume, names.arg = Shower_groups\$group) # pie chart pie(Shower_groups\$mean_volume) # boxplot selected_showers <- Shower[Shower\$Hh_ID %in% c(7890, 4624),] boxplot(selected_showers\$Volume ~ selected_showers\$Hh_ID) # line plot plot(selected_showers[selected_showers\$Hh_ID == 7890,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 7890,]\$Shower, type="l", col="blue") lines(selected_showers[selected_showers\$Hh_ID == 4624,]\$Volume ~ selected_showers[selected_showers\$Hh_ID == 4624,]\$Shower, col="red") legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"), col=c("blue", "red"), lty=1:2, cex=0.8) # histogram hist(Shower\$Volume, breaks = 20) # histogram and density line hist(Shower\$Volume, probability = T, breaks = 20) lines(density(na.omit(Shower\$Volume)), col="blue", lwd=2) # QQPlot qqnorm(Shower\$Volume) # logarithmized QQplot qqnorm(log(Shower\$Volume)) ```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!