Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
R Code for EM Algorithm
1. R Code
For
Expectation-Maximization (EM)
Algorithm for Gaussian Mixtures
Avjinder Singh Kaler
This is the R code for EM algorithm. Here, R code is used for 1D, 2D and 3 clusters dataset.
One can modify this code and use for his own project.
2. Expectation-Maximization (EM) is an iterative
algorithm for finding maximum likelihood estimates of
parameters in statistical models, where the model depends
on unobserved latent variables. The EM iteration alternates
between performing an expectation (E) step, which creates a
function for the expectation of the log-likelihood evaluated
using the current estimate for the parameters, and a
maximization (M) step, which computes parameters
maximizing the expected log-likelihood found on the E step.
Iterate these steps until convergence detect. In simple
language, each iteration consists of an E-step and an M-step.
Convergence is generally detected by computing the value of
the log-likelihood after each iteration and halting when it
appears not to be changing in a significant manner from one
iteration to the next.
3. #FaithFUL Dataset 1D
#
#load data
#
data(faithful)
Y<-faithful$waiting
#
#plot histogram of waiting times
#
postscript("geyser.eps",width=5,height=3,onefile=F)
par(mar=c(2,2,1,1),mgp=c(1,0.5,0),cex=0.7,lwd=0.5,las=1)
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
dev.off()
#
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#2 iterations of EM algorithm
#
p<-emiteration(Y,p,2)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
4. axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#5 iterations of EM algorithm
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#5 iterations of EM algorithm
#
p<-emiteration(Y,p,5)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#10 iterations of EM algorithm
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
5. p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#10 iterations of EM algorithm
#
p<-emiteration(Y,p,10)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#Faithful Dataset 2D
#load data
#
#
#Bivariate mixture model
#
data(faithful)
Y<-faithful$waiting
X<-faithful$eruptions
plot(Y,X,xlab="Waiting time between eruptions (min)",
ylab="Eruption times (min)",
xlim=c(40,100),ylim=c(0,6),
main="",axes=F,cex=0.7)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,6,1),pos=40,las=1)
#
#density of bivariate normal
#
dbinorm<-function(x,m,s){
1/sqrt(det(2*pi*s))*exp(-0.5*t(x-m)%*%solve(s)%*%(x-m))
}
f.hat<-function(x,p) {
p$p*dbinorm(x,p$m1,p$s1)+(1-p$p)*dbinorm(x,p$m2,p$s2)
}
Estep<-function(x,p) {
p$p*dbinorm(x,p$m1,p$s1)/