因子分析には共通因子ξ(グザイ、xi)が共通係数γ(gamma)の線形回帰によって定義されると考える。その他のτなどはアプリオリに決める必要がある。
従来の因子分析に比べて、切片と傾きが求めることができるので、当てはめがよくなる。
モデルは以下の通り。
model{
for(i in 1:50){
for(j in 1:5){
mu[i,j] <- gamma[j,1] + gamma[j,2]*xi[i]
y[i,j] ~ dnorm(mu[i,j],tau[j])
}
}
# ξ=xi、グザイ
for(i in 1:50){
xistar[i] ~ dnorm(0,10)
xi[i] <- (xistar[i] -mean(xistar[]))/sd(xistar[])
}
for(j in 1:5){
gamma[j,1:2] ~ dmnorm(g0[1:2],G0[1:2,1:2])
tau[j] ~ dgamma(.01, .01)
omega[j] <- 1/sqrt(tau[j])
}
}
R側のコマンドはこんな感じになる。
if(exists("foo")) rm(foo)
foo <- list()
library(base)
data <- state.x77[,2:6]
data[,1] <- data[,1]/1000
## 相関行列をざっと見る
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y)
# borrowed from printCoefmat
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * r)
text(.8, .8, Signif, cex=cex, col=2)
}
## pairs(data, lower.panel=panel.smooth, upper.panel=panel.cor)
res <- princomp(data)
summary(res)
## eigenvalue
eigen(cor(data))
ystar <- sweep(data,2,apply(data,2,mean))
## conventional factor analysis
f1 <- factanal(data,1,scores="regression")
lambda <- unclass(f1$loadings)
psi <- diag(f1$uniquenesses)
sigma <- var(data)
gamma <- t(lambda)%*%solve(psi)%*%lambda
beta <- t(solve(1 + gamma)%*%t(lambda)%*%solve(psi))
x1 <- scale(data,TRUE,TRUE)%*%beta
x0 <- f1$scores
forJags <- list(g0=rep(0,2), G0=diag(.01,2,2), y=matrix(as.numeric(data),50,5))
g <- matrix(NA, 5, 2)
omega <- rep(NA, 5)
for(j in 1:5){
tmpReg <- lm(data[,j] ~ x0)
g[j,] <- coef(tmpReg)
omega[j] <- summary(tmpReg)$sigma
}
inits <- list(list(gamma=g, tau=1/omega^2, xistar=as.vector(x0), .RNG.name="base::Mersenne-Twister",.RNG.seed=1264))
require(rjags)
foo <- jags.model(file="factor.bug", data=forJags, inits=inits)
update(foo, n.iter=1000)
res <- coda.samples(foo, variable.names=c("xi","gamma"), n.iter=50000)
結果は以下の通り。
Jackmanはこんな風に美しいグラフを描く。
extractFromCoda <- function(coda,vname){
dnames <- dimnames(coda[[1]])[[2]]
theString <- paste("^",vname,sep="")
cat(paste("searching for",theString,"in MCMC output\n"))
theOnes <- grep(theString,
dnames)
cat(paste("found",length(theOnes),"matching columns\n"))
as.matrix(coda[[1]][,theOnes])
}
x <- extractFromCoda(res,"xi")
g <- extractFromCoda(res,"gamma")
## omega <- extractFromCoda(res,"omega")
nsims <- dim(x)[1]
## summaries for x
myfunc <- function(x)c(mean(x), quantile(x,c(.025,.975)))
xbar <- apply(x,2,myfunc)
xsd <- apply(x,2,sd)
xbar <- t(xbar)
dimnames(xbar) <- list(dimnames(state.x77)[[1]], c("Mean","2.5%","97.5%"))
gbar <- t(apply(g,2,myfunc)) ## summaries for gammas
##omegabar <- t(apply(omega,2,myfunc))
## r-squared
r2 <- matrix(NA,nsims,5)
yss <- apply(y,2,function(x)sum((x-mean(x))^2))
for(iter in 1:nsims){
rho <- cor(cbind(x[iter,],y))[2:6,1]
r2[iter,] <- rho^2
}
r2bar <- t(apply(r2,2,myfunc))
#########################################################################
## graphs
indx <- order(xbar[,1]) ## sort ideal point estimates
cols <- rep("black",50) ## colors
southernStates <- c("Louisiana","Mississippi","South Carolina","Alabama","Georgia","North Carolina","Kentucky", "Tennessee","Texas","Arkansas","Virginia","Florida")
cols[dimnames(xbar)[[1]] %in% southernStates] <- gray(.45)
##pdf(file="LatentVariable1.pdf", width=11.5,height=16)
png(file="LatentVariable1.png", width=600,height=1200)
par(mfrow=c(1,1), mar=c(2,0.1,2,0.1), omi=rep(0,4))
plotlims <- range(xbar)
axislims <- c(-2.5,2)
axisticks <- seq(from=axislims[1],to=axislims[2],by=.5)
axislabs <- as.character(axisticks)
textloc <- 1.01*xbar[,2]
textloc[textloc > -3] <- 1.01 * -3
## empty plotting region
plot(x=plotlims,y=c(1,50), type="n", cex=.5, ylim=c(.5,50.5),yaxs="i", axes=FALSE, xlab="",ylab="")
for(i in 1:50){
## line for confidence interval
lines(y=c(i,i), x=c(xbar[indx[i],2],xbar[indx[i],3]), lwd=2.5)
## overlay posterior mean
points(y=i,x=xbar[indx[i],1],col=cols[indx[i]], pch=19,cex=1.25)
## text, state name
text(xbar[indx[i],3],i, dimnames(xbar)[[1]][indx[i]], adj=-0.1)
}
axis(1,at=axisticks,labels=axislabs,cex=.5,lwd=1.5)
axis(3,at=axisticks,labels=axislabs,cex=.5,lwd=1.5)
dev.off()
MCMC因子分析
 | 伝統的因子分析
 |