Commit e8d7dd1b authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

Updated Tutorial 3

parent e18f10db
......@@ -51,7 +51,6 @@ filter(fruits, color %in% c("yellow", "red"))
# Task 3
filter(fruits, color %in% c("yellow", "red") & fruit != "banana")
```
......@@ -75,96 +74,3 @@ select(arrange(fruits_weight, desc(fruits_per_pound)), fruit, fruits_per_pound)
source(file = "../functions/pot.R")
mutate(fruits_weight, fruits_per_kg_pot = pot(fruits_per_kg))
```
```{r Using dplyr: summarize}
summarise(fruits, mean(weight))
fruits_grouped_color <- group_by(fruits, color)
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n())
# Task 8
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight))
# Task 9
summarize(fruits_grouped_color, fist_fruit = first(fruit), last = last(fruit), n_distinct_fruits = n_distinct(fruit))
# Task 10
?n_distinct
```
```{r Using dplyr: the piping operator}
fruits %>%
group_by(color) %>%
summarise(avg_weight = mean(weight), num_fruits = n()) %>%
arrange(avg_weight)
# Task 11
fruits %>% filter(between(weight, 8, 100))
# Task 12
fruits_weight %>% arrange(desc(fruits_per_pound)) %>% select(fruit, fruits_per_pound)
# Task 13
fruits %>% group_by(color) %>% summarise(avg_weight = mean(weight), num_fruits = n(), sd_weigth = sd(weight), var_weight = var(weight), min_weight = min(weight), max_weight = max(weight))
```
```{r Using dplyr: Joins}
recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2),
fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon"))
full_join(fruits, recipe)
left_join(fruits, recipe)
```
```{r Illustrating analytical results by plotting techniques}
# Load and prepare data
Shower <- read.csv2("../../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group")
Shower$AvgTemperature <- as.numeric(Shower$AvgTemperature)
# Scatterplot
plot(x = Shower$AvgTemperature, y = Shower$Volume)
# Scatterplut with formular
plot(Shower$Volume ~ Shower$AvgTemperature)
# Barplot mean_volume per group
Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T))
barplot(Shower_groups$mean_volume, names.arg = Shower_groups$group)
# pie chart
pie(Shower_groups$mean_volume)
# boxplot
selected_showers <- Shower[Shower$Hh_ID %in% c(7890, 4624),]
boxplot(selected_showers$Volume ~ selected_showers$Hh_ID)
# line plot
plot(selected_showers[selected_showers$Hh_ID == 7890,]$Volume ~ selected_showers[selected_showers$Hh_ID == 7890,]$Shower, type="l", col="blue")
lines(selected_showers[selected_showers$Hh_ID == 4624,]$Volume ~ selected_showers[selected_showers$Hh_ID == 4624,]$Shower, col="red")
legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"),
col=c("blue", "red"), lty=1:2, cex=0.8)
# histogram
hist(Shower$Volume, breaks = 20)
# histogram and density line
hist(Shower$Volume, probability = T, breaks = 20)
lines(density(na.omit(Shower$Volume)), col="blue", lwd=2)
# QQPlot
qqnorm(Shower$Volume)
# logarithmized QQplot
qqnorm(log(Shower$Volume))
```
......@@ -74,93 +74,3 @@ fruits_weight <- mutate(fruits, fruits_per_kg = round(1000/weight, 1))
# Task 7
```
```{r Using dplyr: summarize}
summarise(fruits, mean(weight))
fruits_grouped_color <- group_by(fruits, color)
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n())
# Task 8
# Task 9
# Task 10
```
```{r Using dplyr: the piping operator}
fruits %>%
group_by(color) %>%
summarise(avg_weight = mean(weight), num_fruits = n()) %>%
arrange(avg_weight)
# Task 11
# Task 12
# Task 13
```
```{r Using dplyr: Joins}
recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2),
fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon"))
full_join(fruits, recipe)
left_join(fruits, recipe)
```
```{r Illustrating analytical results by plotting techniques}
# Load and prepare data
Shower <- read.csv2("../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group")
Shower$AvgTemperature <- as.numeric(Shower$AvgTemperature)
# Scatterplot
plot(x = Shower$AvgTemperature, y = Shower$Volume)
# Scatterplut with formular
plot(Shower$Volume ~ Shower$AvgTemperature)
# Barplot mean_volume per group
Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T))
barplot(Shower_groups$mean_volume, names.arg = Shower_groups$group)
# pie chart
pie(Shower_groups$mean_volume)
# boxplot
selected_showers <- Shower[Shower$Hh_ID %in% c(7890, 4624),]
boxplot(selected_showers$Volume ~ selected_showers$Hh_ID)
# line plot
plot(selected_showers[selected_showers$Hh_ID == 7890,]$Volume ~ selected_showers[selected_showers$Hh_ID == 7890,]$Shower, type="l", col="blue")
lines(selected_showers[selected_showers$Hh_ID == 4624,]$Volume ~ selected_showers[selected_showers$Hh_ID == 4624,]$Shower, col="red")
legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"),
col=c("blue", "red"), lty=1:2, cex=0.8)
# histogram
hist(Shower$Volume, breaks = 20)
# histogram and density line
hist(Shower$Volume, probability = T, breaks = 20)
lines(density(na.omit(Shower$Volume)), col="blue", lwd=2)
# QQPlot
qqnorm(Shower$Volume)
# logarithmized QQplot
qqnorm(log(Shower$Volume))
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment