Commit 223601eb authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

updated newsletter case

parent 45d470ab
---
title: 'Tutorial 6: Newsletter Case'
title: 'Tutorial 5-6: Newsletter Case'
output: html_notebook
editor_options:
chunk_output_type: inline
......@@ -16,20 +16,16 @@ library(lubridate)
```{r read the files with basic R functions}
nl_mailsSend <- read.csv2("../../data/newsletter/newsletterData_mailsSend.csv",
encoding = "UTF-8", # this is usually not needed
stringsAsFactors = F # R formats columns with text as factor,
# this is not meaningful in our case
)
nl_clicks <- read.csv2("../../data/newsletter/newsletterData_clicks.csv",
encoding = "UTF-8", stringsAsFactors = F)
nl_links <- read.csv2("../../data/newsletter/newsletterData_links.csv",
encoding = "UTF-8", stringsAsFactors = F)
nl_opens <- read.csv2("../../data/newsletter/newsletterData_opens.csv",
encoding = "UTF-8", stringsAsFactors = F)
# Exercise 1
nl_mailsSend <- read.csv2("../../data/newsletter/newsletterData_mailsSend.csv", encoding = "UTF-8")
nl_clicks <- read.csv2("../../data/newsletter/newsletterData_clicks.csv", encoding = "UTF-8")
nl_links <- read.csv2("../../data/newsletter/newsletterData_links.csv", encoding = "UTF-8")
nl_opens <- read.csv2("../../data/newsletter/newsletterData_opens.csv", encoding = "UTF-8")
```
```{r inspect the data}
# Exercise 2
summary(nl_mailsSend)
str(nl_mailsSend)
......@@ -45,14 +41,15 @@ str(nl_opens)
```{r format the data correctly (exercise 3-5)}
# format the nl_mailsSend dataset
nl_mailsSend$VID <- as.factor(nl_mailsSend$VID)
nl_mailsSend$EmailID <- as.factor(nl_mailsSend$EmailID)
nl_mailsSend$NumTipIDs <- as.factor(nl_mailsSend$NumTipIDs)
nl_mailsSend$NewsletterTitle <- as.factor(nl_mailsSend$NewsletterTitle)
nl_mailsSend$EnergyReport.EfficiencyLevel <- as.factor(nl_mailsSend$EnergyReport.EfficiencyLevel)
nl_mailsSend$EnergyReport.Quarter <- as.factor(nl_mailsSend$EnergyReport.Quarter)
table(nl_mailsSend$EnergyReport.EfficiencyLevel) # Efficiency levels have a natural ordering
nl_mailsSend$EnergyReport.EfficiencyLevel <- factor(nl_mailsSend$EnergyReport.EfficiencyLevel, levels = c("A", "B", "C", "D", "E", "F", "G"), ordered = TRUE)
nl_mailsSend$EnergyReport.HouseholdType <- as.factor(nl_mailsSend$EnergyReport.HouseholdType)
nl_mailsSend$EnergyReport.HouseholdMembers <- as.factor(nl_mailsSend$EnergyReport.HouseholdMembers)
nl_mailsSend$EnergyReport.HouseholdMembers <- factor(nl_mailsSend$EnergyReport.HouseholdMembers, levels = c("3-5", "6+"), ordered = T) # Classes of household members have a natural ordering
# format the nl_clicks dataset
......@@ -187,6 +184,8 @@ boxplot(nl_mailsSend$EnergyReport.Cons[nl_mailsSend$EnergyReport.Cons<3000], hor
# exercise 27
hist(nl_mailsSend$EnergyReport.Cons, breaks = 30)
```
```{r histogram with additional lines - exercise 28 + 29}
# exercise 27-29
hist(nl_mailsSend$EnergyReport.Cons,
......@@ -197,25 +196,37 @@ lines(density(nl_mailsSend$EnergyReport.Cons), col=2)
abline(v=quantile(nl_mailsSend$EnergyReport.Cons, probs = c(0.25,0.5,0.75)), col=3)
```
```{r bar and pie charts}
# exercise 31
x <- table(as_date(nl_mailsSend$SendDate))
barplot(x) # barplot needs a frequency table
```{r bar and pie charts with colors (exercise 30 extended)}
# exercise 32
library(RColorBrewer)
mycolors <- brewer.pal(4, "Dark2")
mycolors <- brewer.pal(length(x), "Dark2")
barplot(table(as_date(nl_mailsSend$SendDate)), # barplot needs a frequency table
barplot(x, # barplot needs a frequency table
main="Number of emails sent per newsletter", # the plot title
horiz = T, # horizontal barplot
las=1, # aligns the axis labels to the reading direction
cex.names = 0.6, # adjust size of the labels
col=mycolors[c(1,1,1,1,2,3,4)]) # set the colors
col=mycolors) # set the colors
# exercise 33
x <- round(prop.table(table(nl_mailsSend$EnergyReport.EfficiencyLevel))*100,2)
pie(x = x)
# exercise 34
pie(x = x, labels = paste0(names(x), "(", x, "%)"), col = brewer.pal(n=length(x), name="Set1"))
legend("topright", names(x), cex = 0.8, fill = brewer.pal(n=length(x), name="Set1"))
pie(table(as_date(nl_mailsSend$SendDate)),
col=mycolors[c(1,1,1,1,2,3,4)],
main="Number of emails sent per newsletter")
```
```{r plot showing email opens and clicks over time}
```{r Homework task: Plot showing email opens and clicks over time}
# identify all opens per day
actions_open <- nl_opens %>%
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment