Commit 223601eb authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

updated newsletter case

parent 45d470ab
--- ---
title: 'Tutorial 6: Newsletter Case' title: 'Tutorial 5-6: Newsletter Case'
output: html_notebook output: html_notebook
editor_options: editor_options:
chunk_output_type: inline chunk_output_type: inline
...@@ -16,20 +16,16 @@ library(lubridate) ...@@ -16,20 +16,16 @@ library(lubridate)
```{r read the files with basic R functions} ```{r read the files with basic R functions}
nl_mailsSend <- read.csv2("../../data/newsletter/newsletterData_mailsSend.csv", # Exercise 1
encoding = "UTF-8", # this is usually not needed nl_mailsSend <- read.csv2("../../data/newsletter/newsletterData_mailsSend.csv", encoding = "UTF-8")
stringsAsFactors = F # R formats columns with text as factor,
# this is not meaningful in our case nl_clicks <- read.csv2("../../data/newsletter/newsletterData_clicks.csv", encoding = "UTF-8")
) nl_links <- read.csv2("../../data/newsletter/newsletterData_links.csv", encoding = "UTF-8")
nl_clicks <- read.csv2("../../data/newsletter/newsletterData_clicks.csv", nl_opens <- read.csv2("../../data/newsletter/newsletterData_opens.csv", encoding = "UTF-8")
encoding = "UTF-8", stringsAsFactors = F)
nl_links <- read.csv2("../../data/newsletter/newsletterData_links.csv",
encoding = "UTF-8", stringsAsFactors = F)
nl_opens <- read.csv2("../../data/newsletter/newsletterData_opens.csv",
encoding = "UTF-8", stringsAsFactors = F)
``` ```
```{r inspect the data} ```{r inspect the data}
# Exercise 2
summary(nl_mailsSend) summary(nl_mailsSend)
str(nl_mailsSend) str(nl_mailsSend)
...@@ -45,14 +41,15 @@ str(nl_opens) ...@@ -45,14 +41,15 @@ str(nl_opens)
```{r format the data correctly (exercise 3-5)} ```{r format the data correctly (exercise 3-5)}
# format the nl_mailsSend dataset # format the nl_mailsSend dataset
nl_mailsSend$VID <- as.factor(nl_mailsSend$VID) nl_mailsSend$VID <- as.factor(nl_mailsSend$VID)
nl_mailsSend$EmailID <- as.factor(nl_mailsSend$EmailID) nl_mailsSend$EmailID <- as.factor(nl_mailsSend$EmailID)
nl_mailsSend$NumTipIDs <- as.factor(nl_mailsSend$NumTipIDs)
nl_mailsSend$NewsletterTitle <- as.factor(nl_mailsSend$NewsletterTitle) nl_mailsSend$NewsletterTitle <- as.factor(nl_mailsSend$NewsletterTitle)
nl_mailsSend$EnergyReport.EfficiencyLevel <- as.factor(nl_mailsSend$EnergyReport.EfficiencyLevel) nl_mailsSend$EnergyReport.Quarter <- as.factor(nl_mailsSend$EnergyReport.Quarter)
table(nl_mailsSend$EnergyReport.EfficiencyLevel) # Efficiency levels have a natural ordering
nl_mailsSend$EnergyReport.EfficiencyLevel <- factor(nl_mailsSend$EnergyReport.EfficiencyLevel, levels = c("A", "B", "C", "D", "E", "F", "G"), ordered = TRUE)
nl_mailsSend$EnergyReport.HouseholdType <- as.factor(nl_mailsSend$EnergyReport.HouseholdType) nl_mailsSend$EnergyReport.HouseholdType <- as.factor(nl_mailsSend$EnergyReport.HouseholdType)
nl_mailsSend$EnergyReport.HouseholdMembers <- as.factor(nl_mailsSend$EnergyReport.HouseholdMembers) nl_mailsSend$EnergyReport.HouseholdMembers <- factor(nl_mailsSend$EnergyReport.HouseholdMembers, levels = c("3-5", "6+"), ordered = T) # Classes of household members have a natural ordering
# format the nl_clicks dataset # format the nl_clicks dataset
...@@ -187,6 +184,8 @@ boxplot(nl_mailsSend$EnergyReport.Cons[nl_mailsSend$EnergyReport.Cons<3000], hor ...@@ -187,6 +184,8 @@ boxplot(nl_mailsSend$EnergyReport.Cons[nl_mailsSend$EnergyReport.Cons<3000], hor
# exercise 27 # exercise 27
hist(nl_mailsSend$EnergyReport.Cons, breaks = 30) hist(nl_mailsSend$EnergyReport.Cons, breaks = 30)
``` ```
```{r histogram with additional lines - exercise 28 + 29} ```{r histogram with additional lines - exercise 28 + 29}
# exercise 27-29 # exercise 27-29
hist(nl_mailsSend$EnergyReport.Cons, hist(nl_mailsSend$EnergyReport.Cons,
...@@ -197,25 +196,37 @@ lines(density(nl_mailsSend$EnergyReport.Cons), col=2) ...@@ -197,25 +196,37 @@ lines(density(nl_mailsSend$EnergyReport.Cons), col=2)
abline(v=quantile(nl_mailsSend$EnergyReport.Cons, probs = c(0.25,0.5,0.75)), col=3) abline(v=quantile(nl_mailsSend$EnergyReport.Cons, probs = c(0.25,0.5,0.75)), col=3)
``` ```
```{r bar and pie charts}
# exercise 31
x <- table(as_date(nl_mailsSend$SendDate))
barplot(x) # barplot needs a frequency table
```{r bar and pie charts with colors (exercise 30 extended)} # exercise 32
library(RColorBrewer) library(RColorBrewer)
mycolors <- brewer.pal(4, "Dark2") mycolors <- brewer.pal(length(x), "Dark2")
barplot(table(as_date(nl_mailsSend$SendDate)), # barplot needs a frequency table barplot(x, # barplot needs a frequency table
main="Number of emails sent per newsletter", # the plot title main="Number of emails sent per newsletter", # the plot title
horiz = T, # horizontal barplot horiz = T, # horizontal barplot
las=1, # aligns the axis labels to the reading direction las=1, # aligns the axis labels to the reading direction
cex.names = 0.6, # adjust size of the labels cex.names = 0.6, # adjust size of the labels
col=mycolors[c(1,1,1,1,2,3,4)]) # set the colors col=mycolors) # set the colors
# exercise 33
x <- round(prop.table(table(nl_mailsSend$EnergyReport.EfficiencyLevel))*100,2)
pie(x = x)
# exercise 34
pie(x = x, labels = paste0(names(x), "(", x, "%)"), col = brewer.pal(n=length(x), name="Set1"))
legend("topright", names(x), cex = 0.8, fill = brewer.pal(n=length(x), name="Set1"))
pie(table(as_date(nl_mailsSend$SendDate)),
col=mycolors[c(1,1,1,1,2,3,4)],
main="Number of emails sent per newsletter")
``` ```
```{r plot showing email opens and clicks over time}
```{r Homework task: Plot showing email opens and clicks over time}
# identify all opens per day # identify all opens per day
actions_open <- nl_opens %>% actions_open <- nl_opens %>%
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment