Commit 34b476b8 authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

R Intro 3 added

parent cb9bd62c
---
title: 'Tutorial 3: R Introduction 3'
output: html_notebook
editor_options:
chunk_output_type: inline
---
This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r Functions}
source(file = "../R/functions/pot.R")
pot(2,3)
```
```{r Debugging}
f <- function(a) g(a)
g <- function(b) h(b)
h <- function(c) i(c)
i <- function(d) {
d = d*d
#browser()
"a" + d
return(d)
}
f(10)
```
```{r Using dplyr: Create data and load package}
library(dplyr)
fruits <- data.frame(fruit=c("banana", "cantaloupe",
"apple", "raspberry", "plum", "cherry", "lemon"),
color=c("yellow", "yellow", "red",
"red", "purple", "red", "yellow"),
weight=c(150, 1330, 150, 4, 100, 8, 80))
```
```{r Using dplyr: Filter data}
filter(fruits, color=="yellow" & weight < 1000)
# Task 1
# Task 2
# Task 3
```
```{r Using dplyr: Select, arrange and mutate data}
select(fruits, fruit, weight)
arrange(fruits, weight)
fruits_weight <- mutate(fruits, fruits_per_kg = round(1000/weight, 1))
# Task 4
# Task 5
# Task 6
# Task 7
```
```{r Using dplyr: summarize}
summarise(fruits, mean(weight))
fruits_grouped_color <- group_by(fruits, color)
summarise(fruits_grouped_color, avg_weight = mean(weight), num_fruits = n())
# Task 8
# Task 9
# Task 10
```
```{r Using dplyr: the piping operator}
fruits %>%
group_by(color) %>%
summarise(avg_weight = mean(weight), num_fruits = n()) %>%
arrange(avg_weight)
# Task 11
# Task 12
# Task 13
```
```{r Using dplyr: Joins}
recipe <- data.frame(recipeNo=c(1,1,1,1,2,2,2),
fruit=c("banana", "lemon", "raspberry", "shugar", "cantaloupe", "plum", "lemon"))
full_join(fruits, recipe)
left_join(fruits, recipe)
```
```{r Illustrating analytical results by plotting techniques}
# Load and prepare data
Shower <- read.csv2("../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group")
Shower$AvgTemperature <- as.numeric(Shower$AvgTemperature)
# Scatterplot
plot(x = Shower$AvgTemperature, y = Shower$Volume)
# Scatterplut with formular
plot(Shower$Volume ~ Shower$AvgTemperature)
# Barplot mean_volume per group
Shower_groups <- Shower %>% group_by(group) %>% summarize(mean_volume = mean(Volume, na.rm = T))
barplot(Shower_groups$mean_volume, names.arg = Shower_groups$group)
# pie chart
pie(Shower_groups$mean_volume)
# boxplot
selected_showers <- Shower[Shower$Hh_ID %in% c(7890, 4624),]
boxplot(selected_showers$Volume ~ selected_showers$Hh_ID)
# line plot
plot(selected_showers[selected_showers$Hh_ID == 7890,]$Volume ~ selected_showers[selected_showers$Hh_ID == 7890,]$Shower, type="l", col="blue")
lines(selected_showers[selected_showers$Hh_ID == 4624,]$Volume ~ selected_showers[selected_showers$Hh_ID == 4624,]$Shower, col="red")
legend(60, 120, legend=c("Hh_ID = 7890", "Hh_ID = 4624"),
col=c("blue", "red"), lty=1:2, cex=0.8)
# histogram
hist(Shower$Volume, breaks = 20)
# histogram and density line
hist(Shower$Volume, probability = T, breaks = 20)
lines(density(na.omit(Shower$Volume)), col="blue", lwd=2)
# QQPlot
qqnorm(Shower$Volume)
# logarithmized QQplot
qqnorm(log(Shower$Volume))
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment