Commit 89ed967c authored by Weigert, Andreas's avatar Weigert, Andreas
Browse files

changed file paths for data load and write functions according to the new file structure

parent 8ba69660
...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
# Read the APC dataset # Read the APC dataset
```{r Read data} ```{r Read data}
APC <- read.csv2("../data/APC-dataset-anonym.csv") APC <- read.csv2("../../data/APC-dataset-anonym.csv")
``` ```
...@@ -175,7 +175,7 @@ The observations outside the whiskers are drawn as outliers in the boxplot ...@@ -175,7 +175,7 @@ The observations outside the whiskers are drawn as outliers in the boxplot
## Identification of outliers with IQR and sigma function ## Identification of outliers with IQR and sigma function
```{r} ```{r}
# Task 11: Identify outliers with IQR and sigma functions # Task 11: Identify outliers with IQR and sigma functions
source("../R/functions/outlier_identification.R") source("../functions/outlier_identification.R")
# apply the functions # apply the functions
......
...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
# Read the APC dataset # Read the APC dataset
```{r Read data} ```{r Read data}
APC <- read.csv2("../data/APC-dataset-anonym.csv") APC <- read.csv2("../../data/APC-dataset-anonym.csv")
``` ```
...@@ -115,7 +115,7 @@ APC <- read.csv2("../data/APC-dataset-anonym.csv") ...@@ -115,7 +115,7 @@ APC <- read.csv2("../data/APC-dataset-anonym.csv")
## Identification of outliers with IQR and sigma function ## Identification of outliers with IQR and sigma function
```{r} ```{r}
# Task 11: Identify outliers with IQR and sigma functions # Task 11: Identify outliers with IQR and sigma functions
source("../R/functions/outlier_identification.R") source("../functions/outlier_identification.R")
# apply the functions # apply the functions
......
...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -11,7 +11,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
##Exercise 1 ##Exercise 1
```{r} ```{r}
# 1. Load the Data. # 1. Load the Data.
trips <- read.csv2(file="BIA_GPS_TIME_SERIES_1.csv", header=TRUE) trips <- read.csv2(file="../../data/emobility/BIA_GPS_TIME_SERIES_1.csv", header=TRUE)
``` ```
......
...@@ -18,11 +18,11 @@ library(readr) ...@@ -18,11 +18,11 @@ library(readr)
```{r Read and prepare data} ```{r Read and prepare data}
# read data # read data
consumption <- read_csv(file="../data/clustering/bam_energy_report_consumption.csv", na = "NULL") consumption <- read_csv(file="../../data/clustering/bam_energy_report_consumption.csv", na = "NULL")
customer <- read_csv(file="../data/clustering/bam_energy_report_customers.csv", na = "NULL") customer <- read_csv(file="../../data/clustering/bam_energy_report_customers.csv", na = "NULL")
logins <- read_csv(file="../data/clustering/bam_energy_report_logins.csv", na = "NULL") logins <- read_csv(file="../../data/clustering/bam_energy_report_logins.csv", na = "NULL")
survey <- read_csv(file="../data/clustering/bam_energy_report_survey.csv", na = "NULL") survey <- read_csv(file="../../data/clustering/bam_energy_report_survey.csv", na = "NULL")
portal_points <- read_csv(file="../data/clustering/bam_energy_report_portal_points.csv", na = "NULL") portal_points <- read_csv(file="../../data/clustering/bam_energy_report_portal_points.csv", na = "NULL")
# convert data # convert data
consumption$CustomerID <- as.character(consumption$CustomerID) consumption$CustomerID <- as.character(consumption$CustomerID)
......
...@@ -18,11 +18,11 @@ library(readr) ...@@ -18,11 +18,11 @@ library(readr)
```{r Read and prepare data} ```{r Read and prepare data}
# read data # read data
consumption <- read_csv(file="../data/clustering/bam_energy_report_consumption.csv", na = "NULL") consumption <- read_csv(file="../../data/clustering/bam_energy_report_consumption.csv", na = "NULL")
customer <- read_csv(file="../data/clustering/bam_energy_report_customers.csv", na = "NULL") customer <- read_csv(file="../../data/clustering/bam_energy_report_customers.csv", na = "NULL")
logins <- read_csv(file="../data/clustering/bam_energy_report_logins.csv", na = "NULL") logins <- read_csv(file="../../data/clustering/bam_energy_report_logins.csv", na = "NULL")
survey <- read_csv(file="../data/clustering/bam_energy_report_survey.csv", na = "NULL") survey <- read_csv(file="../../data/clustering/bam_energy_report_survey.csv", na = "NULL")
portal_points <- read_csv(file="../data/clustering/bam_energy_report_portal_points.csv", na = "NULL") portal_points <- read_csv(file="../../data/clustering/bam_energy_report_portal_points.csv", na = "NULL")
# convert data # convert data
consumption$CustomerID <- as.character(consumption$CustomerID) consumption$CustomerID <- as.character(consumption$CustomerID)
......
...@@ -20,7 +20,7 @@ library(randomForest) #further random forest ...@@ -20,7 +20,7 @@ library(randomForest) #further random forest
```{r Load and prepare data} ```{r Load and prepare data}
# Load data # Load data
load("../data/classification.RData") load("../../data/classification.RData")
# Derive and investigate the dependent variable "number of residents" # Derive and investigate the dependent variable "number of residents"
adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr", adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr",
......
...@@ -19,7 +19,7 @@ library(dplyr) #for data wrangling ...@@ -19,7 +19,7 @@ library(dplyr) #for data wrangling
```{r Load and prepare data} ```{r Load and prepare data}
# Load data # Load data
load("../data/classification2.RData") load("../../data/classification2.RData")
# Derive and investigate the dependent variable "number of residents" # Derive and investigate the dependent variable "number of residents"
adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr", adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr",
......
...@@ -19,7 +19,7 @@ library(dplyr) #for data wrangling ...@@ -19,7 +19,7 @@ library(dplyr) #for data wrangling
```{r Load and prepare data} ```{r Load and prepare data}
# Load data # Load data
load("../data/classification2.RData") load("../../data/classification2.RData")
# Derive and investigate the dependent variable "number of residents" # Derive and investigate the dependent variable "number of residents"
adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr", adults <- as.integer(ifelse(customers$residents.numAdult=="5 oder mehr",
......
...@@ -16,16 +16,16 @@ library(lubridate) ...@@ -16,16 +16,16 @@ library(lubridate)
```{r read the files with basic R functions} ```{r read the files with basic R functions}
nl_mailsSend <- read.csv2("../data/newsletter/newsletterData_mailsSend.csv", nl_mailsSend <- read.csv2("../../data/newsletter/newsletterData_mailsSend.csv",
encoding = "UTF-8", # this is usually not needed encoding = "UTF-8", # this is usually not needed
stringsAsFactors = F # R formats columns with text as factor, stringsAsFactors = F # R formats columns with text as factor,
# this is not meaningful in our case # this is not meaningful in our case
) )
nl_clicks <- read.csv2("../data/newsletter/newsletterData_clicks.csv", nl_clicks <- read.csv2("../../data/newsletter/newsletterData_clicks.csv",
encoding = "UTF-8", stringsAsFactors = F) encoding = "UTF-8", stringsAsFactors = F)
nl_links <- read.csv2("../data/newsletter/newsletterData_links.csv", nl_links <- read.csv2("../../data/newsletter/newsletterData_links.csv",
encoding = "UTF-8", stringsAsFactors = F) encoding = "UTF-8", stringsAsFactors = F)
nl_opens <- read.csv2("../data/newsletter/newsletterData_opens.csv", nl_opens <- read.csv2("../../data/newsletter/newsletterData_opens.csv",
encoding = "UTF-8", stringsAsFactors = F) encoding = "UTF-8", stringsAsFactors = F)
``` ```
......
...@@ -12,7 +12,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M) ...@@ -12,7 +12,7 @@ This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M)
# Task 2 # Task 2
# Read data. Remember the relative path # Read data. Remember the relative path
Shower <- read.csv2("../data/Shower_data.csv") Shower <- read.csv2("../../data/Shower_data.csv")
?read.csv #help pages for format options of the data file ?read.csv #help pages for format options of the data file
...@@ -84,10 +84,10 @@ quantile(Shower_clean$ShowerTime) ...@@ -84,10 +84,10 @@ quantile(Shower_clean$ShowerTime)
```{r Write and filter data} ```{r Write and filter data}
# Task 12 # Task 12
write.csv2(x = Shower[Shower$Hh_ID == 8899,], file="../output/problematic_shower_data.csv") write.csv2(x = Shower[Shower$Hh_ID == 8899,], file="../../output/problematic_shower_data.csv")
# Task 13 # Task 13
write.csv2(x = Shower[Shower$Hh_ID != 8899,], file="../output/cleaned_shower_data.csv") write.csv2(x = Shower[Shower$Hh_ID != 8899,], file="../../output/cleaned_shower_data.csv")
``` ```
After cleaning data we have stored the data to the folder "output". After cleaning data we have stored the data to the folder "output".
......
...@@ -7,7 +7,7 @@ editor_options: ...@@ -7,7 +7,7 @@ editor_options:
This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg. This file is part of the lecture Business Intelligence & Analytics (EESYS-BIA-M), Information Systems and Energy Efficient Systems, University of Bamberg.
```{r Solution for Cooldown exercise} ```{r Solution for Cooldown exercise}
Shower <- read.csv2("../data/Shower_data.csv") Shower <- read.csv2("../../data/Shower_data.csv")
summary(Shower) summary(Shower)
Shower$group <- as.factor(Shower$group) Shower$group <- as.factor(Shower$group)
...@@ -20,7 +20,7 @@ tapply(Shower_more_than_ten$Volume, Shower_more_than_ten$group, mean, na.rm=T) ...@@ -20,7 +20,7 @@ tapply(Shower_more_than_ten$Volume, Shower_more_than_ten$group, mean, na.rm=T)
```{r Functions} ```{r Functions}
source(file = "../R/functions/pot.R") source(file = "../functions/pot.R")
pot(2,3) pot(2,3)
``` ```
...@@ -83,7 +83,7 @@ fruits_weight <- mutate(fruits_weight, fruits_per_pound = round(fruits_per_kg * ...@@ -83,7 +83,7 @@ fruits_weight <- mutate(fruits_weight, fruits_per_pound = round(fruits_per_kg *
select(arrange(fruits_weight, desc(fruits_per_pound)), fruit, fruits_per_pound) select(arrange(fruits_weight, desc(fruits_per_pound)), fruit, fruits_per_pound)
# Task 7 # Task 7
source(file = "../R/functions/pot.R") source(file = "../functions/pot.R")
mutate(fruits_weight, fruits_per_kg_pot = pot(fruits_per_kg)) mutate(fruits_weight, fruits_per_kg_pot = pot(fruits_per_kg))
``` ```
...@@ -131,7 +131,7 @@ left_join(fruits, recipe) ...@@ -131,7 +131,7 @@ left_join(fruits, recipe)
```{r Illustrating analytical results by plotting techniques} ```{r Illustrating analytical results by plotting techniques}
# Load and prepare data # Load and prepare data
Shower <- read.csv2("../data/Shower_data.csv") Shower <- read.csv2("../../data/Shower_data.csv")
Shower$group <- as.factor(Shower$group) Shower$group <- as.factor(Shower$group)
levels(Shower$group) <- c("First group", "Second group", "Fourth group", levels(Shower$group) <- c("First group", "Second group", "Fourth group",
"Third group", "Fifth group", "Sixth group") "Third group", "Fifth group", "Sixth group")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment