# boxplot(tt$Gender=="Female",tt$Clarity.LACE..Score)
# boxplot(tt$Gender,tt$Clarity.LACE..Score)
# levels(tt$Gender)
plot(Gender)
# boxplot(factor(tt$Gender,levels = c("Male","Female")),tt$Clarity.LACE..Score)
table(tt$Gender)
plot(Gender,Clarity.LACE..Score)
tapply(tt$Clarity.LACE..Score,Gender,mean)
tt <- training2
attach(tt,warn.conflicts = F)
# retry the model
sl1 <- lm(tt,formula = Clarity.LACE..Score~factor(tt$Gender))
levels(Gender)
table(tt$Gender)
summary(sl1)
par(mfrow=c(2,1))
hist(dataz2$Clarity.LACE..Score)
hist(tt$Clarity.LACE..Score)
#therefore the lace.score does not follow normal distrbution I cannot use anova here ?
anova(sl1)
par(mfrow=c(1,2))
# boxplot(tt$Gender=="Male",tt$Clarity.LACE..Score)
# boxplot(tt$Gender=="Female",tt$Clarity.LACE..Score)
# boxplot(tt$Gender,tt$Clarity.LACE..Score)
# levels(tt$Gender)
plot(Gender)
# boxplot(factor(tt$Gender,levels = c("Male","Female")),tt$Clarity.LACE..Score)
table(tt$Gender)
plot(Gender,Clarity.LACE..Score)
tapply(tt$Clarity.LACE..Score,Gender,mean)
tt <- training2
attach(tt,warn.conflicts = F)
# retry the model
sl1 <- lm(tt,formula = Clarity.LACE..Score~factor(tt$Gender))
levels(Gender)
table(tt$Gender)
summary(sl1)
par(mfrow=c(2,1))
hist(dataz2$Clarity.LACE..Score)
hist(tt$Clarity.LACE..Score)
#therefore the lace.score does not follow normal distrbution I cannot use anova here ?
anova(sl1)
par(mfrow=c(1,2))
# boxplot(tt$Gender=="Male",tt$Clarity.LACE..Score)
# boxplot(tt$Gender=="Female",tt$Clarity.LACE..Score)
# boxplot(tt$Gender,tt$Clarity.LACE..Score)
# levels(tt$Gender)
plot(Gender)
# boxplot(factor(tt$Gender,levels = c("Male","Female")),tt$Clarity.LACE..Score)
table(tt$Gender)
plot(Gender,Clarity.LACE..Score)
tapply(tt$Clarity.LACE..Score,Gender,mean)
t.test(sl1)
t.test(tt$Gender)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
pkgload <- lapply(pkg, require(.,warn.conflicts = F), character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require(.,warn.conflicts = F), character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
# data load and inspection
sample <- read.csv("sample.csv")
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# data load and inspection
sample <- read.csv("sample.csv",header = T)
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# data load and inspection
sample <- read.csv("sample.csv",header = F)
sample
View(sample)
# # install.packages("MASS")
# library("MASS")
#
# re <- 1/9*(12+6+28+12+24+34)
# fractions(re)
#
#
# # suppressMessages(library(foo))
#
# # Preview HTML (Markdown and HTML)	Ctrl+Shift+K	Command+Shift+K
# Knit Document (knitr)	Ctrl+Shift+K	Command+Shift+K
# Compile Notebook	Ctrl+Shift+K	Command+Shift+K
# Compile PDF (TeX and Sweave)	Ctrl+Shift+K	Command+Shift+K
# Insert chunk (Sweave and Knitr)	Ctrl+Alt+I	Command+Option+I
# Insert code section	Ctrl+Shift+R	Command+Shift+R
# Run current line/selection	Ctrl+Enter	Command+Enter
# Run current line/selection (retain cursor position)	Alt+Enter	Option+Enter
# Re-run previous region	Ctrl+Shift+P	Command+Shift+P
# Run current document	Ctrl+Alt+R	Command+Option+R
# Run from document beginning to current line	Ctrl+Alt+B	Command+Option+B
# Run from current line to document end	Ctrl+Alt+E	Command+Option+E
# Run the current function definition	Ctrl+Alt+F	Command+Option+F
# Run the current code section	Ctrl+Alt+T	Command+Option+T
# Run previous Sweave/Rmd code	Ctrl+Alt+P	Command+Option+P
# Run the current Sweave/Rmd chunk	Ctrl+Alt+C	Command+Option+C
# Run the next Sweave/Rmd chunk	Ctrl+Alt+N	Command+Option+N
# Source a file	Ctrl+Shift+O	Command+Shift+O
# Source the current document	Ctrl+Shift+S	Command+Shift+S
# Source the current document (with echo)	Ctrl+Shift+Enter	Command+Shift+Enter
# Fold Selected	Alt+L	Cmd+Option+L
# Unfold Selected	Shift+Alt+L	Cmd+Shift+Option+L
# Fold All	Alt+O	Cmd+Option+O
# Unfold All	Shift+Alt+O	Cmd+Shift+Option+O
# Go to line	Shift+Alt+G	Cmd+Shift+Option+G
# Jump to	Shift+Alt+J	Cmd+Shift+Option+J
# Switch to tab	Ctrl+Shift+.	Ctrl+Shift+.
# Previous tab	Ctrl+F11	Ctrl+F11
# Next tab	Ctrl+F12	Ctrl+F12
# First tab	Ctrl+Shift+F11	Ctrl+Shift+F11
# Last tab	Ctrl+Shift+F12	Ctrl+Shift+F12
# Navigate back	Ctrl+F9	Cmd+F9
# Navigate forward	Ctrl+F10	Cmd+F10
# Extract function from selection	Ctrl+Alt+X	Command+Option+X
# Extract variable from selection	Ctrl+Alt+V	Command+Option+V
# Reindent lines	Ctrl+I	Command+I
# Comment/uncomment current line/selection	Ctrl+Shift+C	Command+Shift+C
# Reflow Comment	Ctrl+Shift+/	Command+Shift+/
# Reformat Selection	Ctrl+Shift+A	Command+Shift+A
# Show Diagnostics	Ctrl+Shift+Alt+P	Command+Shift+Alt+P
# Transpose Letters	 	Ctrl+T
# Move Lines Up/Down	Alt+Up/Down	Option+Up/Down
# Copy Lines Up/Down	Shift+Alt+Up/Down	Command+Option+Up/Down
# Jump to Matching Brace/Paren	Ctrl+P	Ctrl+P
# Expand to Matching Brace/Paren	Ctrl+Shift+E	Ctrl+Shift+E
# Select to Matching Brace/Paren	Ctrl+Shift+Alt+E	Ctrl+Shift+Alt+E
# Add Cursor Above Current Cursor	Ctrl+Alt+Up	Ctrl+Alt+Up
# Add Cursor Below Current Cursor	Ctrl+Alt+Down	Ctrl+Alt+Down
# Move Active Cursor Up	Ctrl+Alt+Shift+Up	Ctrl+Alt+Shift+Up
# Move Active Cursor Down	Ctrl+Alt+Shift+Down	Ctrl+Alt+Shift+Down
# Find and Replace	Ctrl+F	Command+F
# Find Next	Win: F3, Linux: Ctrl+G	Command+G
# Find Previous	Win: Shift+F3, Linux: Ctrl+Shift+G	Command+Shift+G
# Use Selection for Find	Ctrl+F3	Command+E
# Replace and Find	Ctrl+Shift+J	Command+Shift+J
# Find in Files	Ctrl+Shift+F	Command+Shift+F
# Check Spelling	F7	F7
#
# Editing (Console and Source)
#
# Description	Windows & Linux	Mac
# Undo	Ctrl+Z	Command+Z
# Redo	Ctrl+Shift+Z	Command+Shift+Z
# Cut	Ctrl+X	Command+X
# Copy	Ctrl+C	Command+C
# Paste	Ctrl+V	Command+V
# Select All	Ctrl+A	Command+A
# Jump to Word	Ctrl+Left/Right	Option+Left/Right
# Jump to Start/End	Ctrl+Home/End or Ctrl+Up/Down	Command+Home/End or Command+Up/Down
# Delete Line	Ctrl+D	Command+D
# Select	Shift+[Arrow]	Shift+[Arrow]
# Select Word	Ctrl+Shift+Left/Right	Option+Shift+Left/Right
# Select to Line Start	Alt+Shift+Left	Command+Shift+Left
# Select to Line End	Alt+Shift+Right	Command+Shift+Right
# Select Page Up/Down	Shift+PageUp/PageDown	Shift+PageUp/Down
# Select to Start/End	Ctrl+Shift+Home/End or Shift+Alt+Up/Down	Command+Shift+Up/Down
# Delete Word Left	Ctrl+Backspace	Option+Backspace or Ctrl+Option+Backspace
# Delete Word Right	 	Option+Delete
# Delete to Line End	 	Ctrl+K
# Delete to Line Start	 	Option+Backspace
# Indent	Tab (at beginning of line)	Tab (at beginning of line)
# Outdent	Shift+Tab	Shift+Tab
# Yank line up to cursor	Ctrl+U	Ctrl+U
# Yank line after cursor	Ctrl+K	Ctrl+K
# Insert currently yanked text	Ctrl+Y	Ctrl+Y
# Insert assignment operator	Alt+-	Option+-
# Insert pipe operator	Ctrl+Shift+M	Cmd+Shift+M
# Show help for function at cursor	F1	F1
# Show source code for function at cursor	F2	F2
# Find usages for symbol at cursor (C++)	Ctrl+Alt+U	Cmd+Option+U
#
# Completions (Console and Source)
#
# Description	Windows & Linux	Mac
# Attempt completion	Tab or Ctrl+Space	Tab or Command+Space
# Navigate candidates	Up/Down	Up/Down
# Accept selected candidate	Enter, Tab, or Right	Enter, Tab, or Right
# Dismiss completion popup	Esc	Esc
#
# Views
#
# Description	Windows & Linux	Mac
# Move focus to Source Editor	Ctrl+1	Ctrl+1
# Move focus to Console	Ctrl+2	Ctrl+2
# Move focus to Help	Ctrl+3	Ctrl+3
# Show History	Ctrl+4	Ctrl+4
# Show Files	Ctrl+5	Ctrl+5
# Show Plots	Ctrl+6	Ctrl+6
# Show Packages	Ctrl+7	Ctrl+7
# Show Environment	Ctrl+8	Ctrl+8
# Show Git/SVN	Ctrl+9	Ctrl+9
# Show Build	Ctrl+0	Ctrl+0
# Sync Editor & PDF Preview	Ctrl+F8	Cmd+F8
# Show Keyboard Shortcut Reference	Alt+Shift+K	Option+Shift+K
#
# Build
#
# Description	Windows & Linux	Mac
# Build and Reload	Ctrl+Shift+B	Cmd+Shift+B
# Load All (devtools)	Ctrl+Shift+L	Cmd+Shift+L
# Test Package (Desktop)	Ctrl+Shift+T	Cmd+Shift+T
# Test Package (Web)	Ctrl+Alt+F7	Cmd+Alt+F7
# Check Package	Ctrl+Shift+E	Cmd+Shift+E
# Document Package	Ctrl+Shift+D	Cmd+Shift+D
#
# Debug
#
# Description	Windows & Linux	Mac
# Toggle Breakpoint	Shift+F9	Shift+F9
# Execute Next Line	F10	F10
# Step Into Function	Shift+F4	Shift+F4
# Finish Function/Loop	Shift+F6	Shift+F6
# Continue	Shift+F5	Shift+F5
# Stop Debugging	Shift+F8	Shift+F8
#
# Plots
#
# Description	Windows & Linux	Mac
# Previous plot	Ctrl+Alt+F11	Command+Option+F11
# Next plot	Ctrl+Alt+F12	Command+Option+F12
#
# Git/SVN
#
# Description	Windows & Linux	Mac
# Diff active source document	Ctrl+Alt+D	Ctrl+Option+D
# Commit changes	Ctrl+Alt+M	Ctrl+Option+M
# Scroll diff view	Ctrl+Up/Down	Ctrl+Up/Down
# Stage/Unstage (Git)	Spacebar	Spacebar
# Stage/Unstage and move to next (Git)	Enter	Enter
#
# Session
#
# Description	Windows & Linux	Mac
# Quit Session (desktop only)	Ctrl+Q	Command+Q
# Restart R Session	Ctrl+Shift+F10	Command+Shift+F10
#
# Was this article helpful?
# 34 out of 37 found this helpful
# Facebook Twitter LinkedIn
# Comments
#
# Related articles
#
# Customizing Keyboard Shortcuts
# Debugging with RStudio
# Using the Data Viewer
# Customizing RStudio
# Version Control with Git and SVN
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# sample
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# sample
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# sample
# data load and inspection
sample <- read.csv("sample.csv",header = F)
# sample
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr")
seq(1,nrow(rpe),10)
gps <- gps %>% group_by(PlayerID) %>%
mutate(tackle = (AccelImpulse > 3.5 & Speed < 1 &(AccelX < 0 | AccelY < 0 | AccelZ < 0)))
gps$group <- rep(1:ceiling(nrow(gps)/10),each=10)
gps$group %>% tail(.,20)
tapply(gps$PlayerID,gps$GameID,mean)
# seq_len(nrow(gps),length.out = 10)
# ?seq_len
tackle_num <- gps %>% group_by(GameID,Half,PlayerID,group) %>%
summarise(tackle = any(tackle), speedAverage = mean(Speed))
tackle_num
tackle_num2 <-  gps %>% group_by(GameID,Half,PlayerID,group) %>%
summarise(tackle = any(tackle), speedAverage = mean(Speed)) %>%  summarise(tackle=sum(tackle),speedAverage=mean(speedAverage))
> data(mtcars)
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car")
pkgload <- lapply(pkg, require, character.only = TRUE)
library("caret")
library("dplyr")
# library("xgboost")
library("ranger")
library("rsample")
library("randomForest")
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car","caret"
,"ranger"
,"rsample"
,"randomForest")
pkgload <- lapply(pkg, require, character.only = TRUE)
pkg <- c("readr","readxl","dplyr","stringr","ggplot2","tidyr","car","caret"
,"ranger"
,"rsample"
,"randomForest")
pkgload <- lapply(pkg, require, character.only = TRUE)
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
print(min(model$results$RMSE))
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
print(min(model$results$RMSE))
> set.seed(42)
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
print(min(model$results$RMSE))
set.seed(42)
model <- train(x = X, y = Y,
method = "glm",
preProcess = "knnImpute"
)
libray("RANN")
install.packages("RANN")
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
print(min(model$results$RMSE))
libray("RANN")
set.seed(42)
model <- train(x = X, y = Y,
method = "glm",
preProcess = "knnImpute"
)
print(min(model$results$RMSE))
data(mtcars)
mtcars[mtcars$disp < 140, "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Use median imputation
set.seed(42)
model <- train(x = X, y = Y, method = "glm",
preProcess = "medianImpute")
print(min(model$results$RMSE))
set.seed(42)
model <- train(x = X, y = Y,
method = "glm",
preProcess = "knnImpute"
)
print(min(model$results$RMSE))
# Compare to 3.61 for median imputation
# Generate some data with missing values
> data(mtcars)
# Generate some data with missing values
data(mtcars)
> set.seed(42)
> mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
> Y <- mtcars$mpg
> X <- mtcars[,2:4]
# Use linear model "recipe"
> set.seed(42)
> model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale")
)
> print(min(model$results$RMSE))
[1] 3.612713
# Generate some data with missing values
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[,2:4]
# Use linear model "recipe"
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale")
)
print(min(model$results$RMSE))
[1] 3.612713
# Generate some data with missing values
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[,2:4]
# Use linear model "recipe"
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale")
)
print(min(model$results$RMSE))
3.612713
# PCA before modeling
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "pca")
)
min(model$results$RMSE)
# Generate some data with missing values
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[,2:4]
# Use linear model "recipe"
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale")
)
print(min(model$results$RMSE))
3.612713
# spatial
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "spatialSign"))
min(model$results$RMSE)
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "spatialSign"))
min(model$results$RMSE)
# PCA before modeling
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "pca")
)
min(model$results$RMSE)
# Reproduce dataset from last video
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Add constant-valued column to mtcars
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "pca")
)
model
summary(model)
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("zv", "medianImpute", "center", "scale", "pca")
)
min(model$results$RMSE)
# Reproduce dataset from last video
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
# Add constant-valued column to mtcars
X$bad <- 1
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "pca")
)
set.seed(42)
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("zv", "medianImpute", "center", "scale", "pca")
)
min(model$results$RMSE)
source('~/.active-rstudio-document', echo=TRUE)
data(mtcars)
set.seed(42)
mtcars[sample(1:nrow(mtcars), 10), "hp"] <- NA
Y <- mtcars$mpg
X <- mtcars[, 2:4]
X$bad <- 1
model <- train(
x = X, y = Y, method = "glm",
preProcess = c("medianImpute", "center", "scale", "pca")
)
xyplot(resamples, metric = "ROC")
