###############R Workshop Code####################
###R Baiscs
#Hashtags allow you to write comments to yourself
#Really useful when you're making notes on the go
A <- 3
a <- 4
A
a
#Objects
X <- 2 #X is the object and it is now holding the number 2 -- X is also a scalar
X <- c(2,2,4,5) #X is the object holding the numbers 2,2,4,5 -- X is now a vector
#the 'c' function above means concetenate
X <- matrix(c(1,1,2,2,3,3), nrow=2, ncol=3)
Y <- matrix(c(1,1,1,1,1,1), nrow=3, ncol=2)
A <- c(1,2,3,4)
B <- c('T','F','T','F') #B is now a character vector -- anytime you use character in R you have to use either '' or "", but don't mix them like '"...that's just crazy
ds <- data.frame(A,B) #ds is now a dataframe, which is exactly what the SPSS data window is
names(ds) <- c('Var1', 'Var2') #Giving names to your variables
Var1
#You just received an error saying "Error: object 'Var1' not found"
ds$Var1
attach(ds) #attaching a dataset means that you can reference the variables in the dataset using only their name
Var1
# rm(ds) removes the dataset...lets not do that yet though...
#Arithmetic
2 + 2
2 - 2
2*3
2/3
#Boolean Operators
2 > 3
3 < 6
4 == 4
#Matrix Algebra
X%*%Y #Matrix multiplication
t(X) #Transpose of X
ginv(X) #Gives you the invere of X
#You probably just received an error telling you that the function ginv could not be found or something like that
#This means we have to load a package
library(MASS) #Loads the package MASS
ginv(X)
#####################Data Management Section
#A few options...you can A) write out the entire location of the file you want to import or B)set your R directory
#We're going to go with Option B, so click file then Change dir...now find the folder you've saved all of the datasets in
#Importing the Moderated Mediation Dataset
MMDS <- read.csv('Mod Med Dataset.csv')
#Importing Logistic Regression Dataset
LRDS <- read.csv('Logistic Reg Dataset.csv')
#Importing HLM Dataset
HLMDS <- read.csv('HLM Dataset.csv')
#Importing SEM Dataset
SEMDS <- read.csv('SEM Dataset.csv')
#Adding variables to a dataframe
ds$C <- c(4,3,2,1) #This attached a new variable to the dataframe "ds"
ds <- ds[,-3]
ds[,2]
ds[1,]
ds[2,2]
#######################Descriptive Statistics Section
set.seed(617418) #Set the seed so we generate the same "random" numbers
X <- round(runif(100,1,6)) #Quick note...you're making an object X that contains 100 randomly generated values from a uniform distribution bounded between 1 and 6. The round command gets rid of decimals
#Central Tendency
mean(X)
median(X)
table(X)
#Dispersion
var(X)
sd(X)
range(X)
#Measures of covariation
#First lets do a tiny simulation
set.seed(618418) #Set the seed so we all generate the same random numbers
X <- rnorm(10000,3,sqrt(3)) #Generated a random variable X from a normal distribution with a mean of 3 and variance of 3
Y <- X*2 + rnorm(10000,2,sqrt(15 - var(X*2))) #You've no just simulated a random variable Y that should covary with X at 6 and correlate at around .89
cov(X,Y) #Covariance
cor(X,Y) #Correlation
##################Inferential Statistics
#General Linear Model
modlm1 <- lm(Y ~ X, data=MMDS)
modlm2 <- lm(Y ~ 1 + X, data=MMDS) #1 just represents the intercept
modlm3 <- lm(Y ~ X + Z, data=MMDS)
modtest <- anova(modlm1, modlm3) #Test models against one another
plot(modlm1$resid)
##Moderated Mediation -- Total Effects Moderation
#Making centered Variables and product terms
MMDS$Xc <- MMDS$X - mean(MMDS$X)
MMDS$Zc <- MMDS$Z - mean(MMDS$Z)
MMDS$Mc <- MMDS$M - mean(MMDS$M)
MMDS$XZc <- MMDS$Xc*MMDS$Zc
MMDS$MZc <- MMDS$Mc*MMDS$Zc
mod1 <- lm(M ~ Xc + Zc + XZc, data= MMDS) #Putting the results of your linear model (lm) into an R object labeled mod1
summary(mod1) #Gives you all of your model information
str(mod1) #allows you to examine the structure of the object mod1 and potentially reference single elements in that structure
mod1$coefficients
mod2 <- lm(Y ~ Xc + Zc + Mc + XZc + MZc, data= MMDS)
summary(mod2)
##You are going to want to get bootstrap estimates of the product term since product terms are not normally distributed
library(boot)
#Getting the bootstrapped coefficients
lmcoef <- function(formula, data, indices) { #creates a function which is similar to a SAS macro...don't worry too much about this
d <- MMDS[indices,]
fit <- lm(formula, data=d)
return(coef(fit))
}
mod1r <- boot(data=MMDS, statistic=lmcoef, R=1000, formula=M ~ Xc + Zc + XZc)
boot.ci(mod1r, type='bca', index = 4) #BCA is bootstrap adjusted confidence interval and index lets the boot.ci function know what variable to choose
mod2r <- boot(data = MMDS, statistic=lmcoef, R=1000, formula= Y ~ Xc + Zc + Mc + XZc + MZc)
boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term XZc
boot.ci(mod2r, type='bca', type=5) #Bootstrap estimate of the product term MZc
########Logistic Regression
#Lets look at the Y variable
plot(LRDS$y) #definitely does not look normal...
#So let's turn to the generalized linear model
mod1LR <- glm(y ~ x1, family=binomial, data=LRDS)
summary(mod1LR)
mod1LR <- glm(y ~ x1 + x2, family=binomial, data=LRDS) #Family lets R know what link function to use
summary(mod2LR)
modtest <- anova(mod1LR, mod2LR)
pchisq(modtest$Deviance[2], modtest$Df, lower.tail=F) #Compares the 2 models using a difference in chisquare test
######Hierarchical Linear Modeling
library(lme4) #opens HLM package
#First we can fit a null or empty model
nmod <- lmer(y ~ 1 + (1|group), data=HLMDS, REML=F) #This allows you to test for a random intercept that varies by group (1|group)
ismod <- lmer(y ~ xgc + (1 + xgc|group), data=HLMDS, REML=F) #REML=F tells R to estimate this modeling using Full Maximum Likelihood not Restricted Max Likelihood
slopetest <- anova(nmod,ismod)
fullmod <- lmer(y ~ xgc + z + (xgc|group), data=HLMDS, REML=F)
fullmodtest <- anova(ismod,fullmod)
################Latent Variable Modeling
##Creating the measurement model
library(sem) #need the sem package
mod1 <- specifyModel()
LV1 -> X1, lam11 #Giving labels to each factor loading and allowing it to be freely estimated
LV1 -> X2, lam21
LV1 -> X3, lam31
LV1 -> X4, lam41
LV2 -> Y1, lam12
LV2 -> Y2, lam22
LV2 -> Y3, lam32
LV2 -> Y4, lam42
LV3 -> Y5, lam13
LV3 -> Y6, lam23
LV3 -> Y7, lam33
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1 #Controlling latent factor variances
LV2 <-> LV2, NA, 1
LV3 <-> LV3, NA, 1
LV1 <-> LV2, Ph12
LV1 <-> LV3, Ph13
LV2 <-> LV3, Ph23
mod1.sem <- sem(mod1, data=SEMDS) #saving your model in mod1.sem
#Now creating your latent structural model or path model aka Structural Equation Modeling
mod2 <- specifyModel()
LV1 -> X1, lam11
LV1 -> X2, lam21
LV1 -> X3, lam31
LV1 -> X4, lam41
LV2 -> Y1, NA, 1
LV2 -> Y2, lam22
LV2 -> Y3, lam32
LV2 -> Y4, lam42
LV3 -> Y5, NA, 1
LV3 -> Y6, lam23
LV3 -> Y7, lam33
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1
LV1 -> LV2, G21
LV1 -> LV3, G31
LV2 -> LV3, G32
mod2.sem <- sem(mod2, data=SEMDS)
coef(mod2.sem, standardized=T)
##Creating an alternative model where the path from LV1 to LV3 is no longer modeled
mod3 <- specifyModel()
LV1 -> X1, lam11
LV1 -> X2, lam21
LV1 -> X3, lam31
LV1 -> X4, lam41
LV2 -> Y1, NA, 1
LV2 -> Y2, lam22
LV2 -> Y3, lam32
LV2 -> Y4, lam42
LV3 -> Y5, NA, 1
LV3 -> Y6, lam23
LV3 -> Y7, lam33
LV3 -> Y8, lam43
LV1 <-> LV1, NA, 1
LV1 -> LV2, G21
LV2 -> LV3, G32
mod3.sem <- sem(mod3, data=SEMDS)
#testing model 2 against model 3
anova(mod2.sem,mod3.sem)